]> source.dussan.org Git - rspamd.git/commitdiff
[Rework] Move and adopt entities handling logic
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 21 May 2021 08:44:16 +0000 (09:44 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 21 May 2021 08:44:16 +0000 (09:44 +0100)
src/libserver/CMakeLists.txt
src/libserver/html/html.cc [deleted file]
src/libserver/html/html.cxx [new file with mode: 0644]
src/libserver/html/html_entities.cxx [new file with mode: 0644]
src/libserver/html/html_entities.hxx
src/libserver/html/html_tag_defs.hxx

index e8267292cec3b65b75fba3fc6804c58343ff9bd0..3a4bae81f7234ebfe40967d2f66a8c9639d00589 100644 (file)
@@ -34,7 +34,8 @@ SET(LIBRSPAMDSERVERSRC
                                ${CMAKE_CURRENT_SOURCE_DIR}/http/http_context.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/maps/map.c
                                ${CMAKE_CURRENT_SOURCE_DIR}/maps/map_helpers.c
-                               ${CMAKE_CURRENT_SOURCE_DIR}/html/html.cc
+                               ${CMAKE_CURRENT_SOURCE_DIR}/html/html_entities.cxx
+                               ${CMAKE_CURRENT_SOURCE_DIR}/html/html.cxx
                                ${LIBCSSSRC})
 
 # Librspamd-server
diff --git a/src/libserver/html/html.cc b/src/libserver/html/html.cc
deleted file mode 100644 (file)
index e650cc3..0000000
+++ /dev/null
@@ -1,3137 +0,0 @@
-/*-
- * Copyright 2016 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "config.h"
-#include "util.h"
-#include "rspamd.h"
-#include "message.h"
-#include "html.h"
-#include "html_tags.h"
-#include "html_colors.h"
-
-#include "url.h"
-#include "contrib/libucl/khash.h"
-#include "libmime/images.h"
-#include "css/css.h"
-#include "libutil/cxx/utf8_util.h"
-
-#include "html_tag_defs.hxx"
-#include "html_entities.hxx"
-
-#include <vector>
-
-#include <unicode/uversion.h>
-#include <unicode/ucnv.h>
-#if U_ICU_VERSION_MAJOR_NUM >= 46
-#include <unicode/uidna.h>
-#endif
-
-namespace rspamd::html {
-
-static const guint max_tags = 8192; /* Ignore tags if this maximum is reached */
-
-#define msg_debug_html(...)  rspamd_conditional_debug_fast (NULL, NULL, \
-        rspamd_html_log_id, "html", pool->tag.uid, \
-        G_STRFUNC, \
-        __VA_ARGS__)
-
-INIT_LOG_MODULE(html)
-
-
-[[maybe_unused]] static const html_tags_storage html_tags_defs;
-[[maybe_unused]] static const html_entities_storage html_entities_defs;
-
-static struct rspamd_url *rspamd_html_process_url(rspamd_mempool_t *pool,
-                                                                                                 const gchar *start, guint len,
-                                                                                                 struct html_tag_component *comp);
-
-static gboolean
-rspamd_html_check_balance(GNode *node, GNode **cur_level)
-{
-       struct html_tag *arg = node->data, *tmp;
-       GNode *cur;
-
-       if (arg->flags & FL_CLOSING) {
-               /* First of all check whether this tag is closing tag for parent node */
-               cur = node->parent;
-               while (cur && cur->data) {
-                       tmp = cur->data;
-                       if (tmp->id == arg->id &&
-                               (tmp->flags & FL_CLOSED) == 0) {
-                               tmp->flags |= FL_CLOSED;
-                               /* Destroy current node as we find corresponding parent node */
-                               g_node_destroy(node);
-                               /* Change level */
-                               *cur_level = cur->parent;
-                               return TRUE;
-                       }
-                       cur = cur->parent;
-               }
-       }
-       else {
-               return TRUE;
-       }
-
-       return FALSE;
-}
-
-gint
-rspamd_html_tag_by_name(const gchar *name) {
-       khiter_t k;
-
-       k = kh_get (tag_by_name, html_tag_by_name, name);
-
-       if (k != kh_end (html_tag_by_name)) {
-               return kh_val (html_tag_by_name, k).id;
-       }
-
-       return -1;
-}
-
-gboolean
-rspamd_html_tag_seen(struct html_content *hc, const gchar *tagname) {
-       gint id;
-
-       g_assert (hc != NULL);
-       g_assert (hc->tags_seen != NULL);
-
-       id = rspamd_html_tag_by_name(tagname);
-
-       if (id != -1) {
-               return isset (hc->tags_seen, id);
-       }
-
-       return FALSE;
-}
-
-const gchar *
-rspamd_html_tag_by_id(gint id) {
-       khiter_t k;
-
-       k = kh_get (tag_by_id, html_tag_by_id, id);
-
-       if (k != kh_end (html_tag_by_id)) {
-               return kh_val (html_tag_by_id, k).name;
-       }
-
-       return NULL;
-}
-
-/* Decode HTML entitles in text */
-guint
-rspamd_html_decode_entitles_inplace(gchar *s, gsize len) {
-       goffset l, rep_len;
-       gchar *t = s, *h = s, *e = s, *end_ptr, old_c;
-       const gchar *end;
-       const gchar *entity;
-       gboolean seen_hash = FALSE, seen_hex = FALSE;
-       enum {
-               do_undefined,
-               do_digits_only,
-               do_mixed,
-       } seen_digit_only;
-       gint state = 0, base;
-       UChar32 uc;
-       khiter_t k;
-
-       if (len == 0) {
-               return 0;
-       }
-       else {
-               l = len;
-       }
-
-       end = s + l;
-
-       while (h - s < l && t <= h) {
-               switch (state) {
-                       /* Out of entity */
-               case 0:
-                       if (*h == '&') {
-                               state = 1;
-                               seen_hash = FALSE;
-                               seen_hex = FALSE;
-                               seen_digit_only = do_undefined;
-                               e = h;
-                               h++;
-                               continue;
-                       }
-                       else {
-                               *t = *h;
-                               h++;
-                               t++;
-                       }
-                       break;
-               case 1:
-                       if (*h == ';' && h > e) {
-decode_entity:
-                               /* Determine base */
-                               /* First find in entities table */
-                               old_c = *h;
-                               *h = '\0';
-                               entity = e + 1;
-                               uc = 0;
-
-                               if (*entity != '#') {
-                                       k = kh_get (entity_by_name, html_entity_by_name, entity);
-                                       *h = old_c;
-
-                                       if (k != kh_end (html_entity_by_name)) {
-                                               if (kh_val (html_entity_by_name, k)) {
-                                                       rep_len = strlen(kh_val (html_entity_by_name, k));
-
-                                                       if (end - t >= rep_len) {
-                                                               memcpy(t, kh_val (html_entity_by_name, k),
-                                                                               rep_len);
-                                                               t += rep_len;
-                                                       }
-                                               }
-                                               else {
-                                                       if (end - t > h - e + 1) {
-                                                               memmove(t, e, h - e + 1);
-                                                               t += h - e + 1;
-                                                       }
-                                               }
-                                       }
-                                       else {
-                                               if (end - t > h - e + 1) {
-                                                       memmove(t, e, h - e + 1);
-                                                       t += h - e + 1;
-                                               }
-                                       }
-                               }
-                               else if (e + 2 < h) {
-                                       if (*(e + 2) == 'x' || *(e + 2) == 'X') {
-                                               base = 16;
-                                       }
-                                       else if (*(e + 2) == 'o' || *(e + 2) == 'O') {
-                                               base = 8;
-                                       }
-                                       else {
-                                               base = 10;
-                                       }
-
-                                       if (base == 10) {
-                                               uc = strtoul((e + 2), &end_ptr, base);
-                                       }
-                                       else {
-                                               uc = strtoul((e + 3), &end_ptr, base);
-                                       }
-
-                                       if (end_ptr != NULL && *end_ptr != '\0') {
-                                               /* Skip undecoded */
-                                               *h = old_c;
-
-                                               if (end - t > h - e + 1) {
-                                                       memmove(t, e, h - e + 1);
-                                                       t += h - e + 1;
-                                               }
-                                       }
-                                       else {
-                                               /* Search for a replacement */
-                                               *h = old_c;
-                                               k = kh_get (entity_by_number, html_entity_by_number, uc);
-
-                                               if (k != kh_end (html_entity_by_number)) {
-                                                       if (kh_val (html_entity_by_number, k)) {
-                                                               rep_len = strlen(kh_val (html_entity_by_number, k));
-
-                                                               if (end - t >= rep_len) {
-                                                                       memcpy(t, kh_val (html_entity_by_number, k),
-                                                                                       rep_len);
-                                                                       t += rep_len;
-                                                               }
-                                                       }
-                                                       else {
-                                                               if (end - t > h - e + 1) {
-                                                                       memmove(t, e, h - e + 1);
-                                                                       t += h - e + 1;
-                                                               }
-                                                       }
-                                               }
-                                               else {
-                                                       /* Unicode point */
-                                                       goffset off = t - s;
-                                                       UBool is_error = 0;
-
-                                                       if (uc > 0) {
-                                                               U8_APPEND (s, off, len, uc, is_error);
-                                                               if (!is_error) {
-                                                                       t = s + off;
-                                                               }
-                                                               else {
-                                                                       /* Leave invalid entities as is */
-                                                                       if (end - t > h - e + 1) {
-                                                                               memmove(t, e, h - e + 1);
-                                                                               t += h - e + 1;
-                                                                       }
-                                                               }
-                                                       }
-                                                       else if (end - t > h - e + 1) {
-                                                               memmove(t, e, h - e + 1);
-                                                               t += h - e + 1;
-                                                       }
-                                               }
-
-                                               if (end - t > 0 && old_c != ';') {
-                                                       /* Fuck email clients, fuck them */
-                                                       *t++ = old_c;
-                                               }
-                                       }
-                               }
-
-                               state = 0;
-                       }
-                       else if (*h == '&') {
-                               /* Previous `&` was bogus */
-                               state = 1;
-
-                               if (end - t > h - e) {
-                                       memmove(t, e, h - e);
-                                       t += h - e;
-                               }
-
-                               e = h;
-                       }
-                       else if (*h == '#') {
-                               seen_hash = TRUE;
-
-                               if (h + 1 < end && h[1] == 'x') {
-                                       seen_hex = TRUE;
-                                       /* Skip one more character */
-                                       h++;
-                               }
-                       }
-                       else if (seen_digit_only != do_mixed &&
-                                        (g_ascii_isdigit (*h) || (seen_hex && g_ascii_isxdigit (*h)))) {
-                               seen_digit_only = do_digits_only;
-                       }
-                       else {
-                               if (seen_digit_only == do_digits_only && seen_hash && h > e) {
-                                       /* We have seen some digits, so we can try to decode, eh */
-                                       /* Fuck retarded email clients... */
-                                       goto decode_entity;
-                               }
-
-                               seen_digit_only = do_mixed;
-                       }
-
-                       h++;
-
-                       break;
-               }
-       }
-
-       /* Leftover */
-       if (state == 1 && h > e) {
-               /* Unfinished entity, copy as is */
-               if (end - t >= h - e) {
-                       memmove(t, e, h - e);
-                       t += h - e;
-               }
-       }
-
-       return (t - s);
-}
-
-static gboolean
-rspamd_url_is_subdomain(rspamd_ftok_t *t1, rspamd_ftok_t *t2) {
-       const gchar *p1, *p2;
-
-       p1 = t1->begin + t1->len - 1;
-       p2 = t2->begin + t2->len - 1;
-
-       /* Skip trailing dots */
-       while (p1 > t1->begin) {
-               if (*p1 != '.') {
-                       break;
-               }
-
-               p1--;
-       }
-
-       while (p2 > t2->begin) {
-               if (*p2 != '.') {
-                       break;
-               }
-
-               p2--;
-       }
-
-       while (p1 > t1->begin && p2 > t2->begin) {
-               if (*p1 != *p2) {
-                       break;
-               }
-
-               p1--;
-               p2--;
-       }
-
-       if (p2 == t2->begin) {
-               /* p2 can be subdomain of p1 if *p1 is '.' */
-               if (p1 != t1->begin && *(p1 - 1) == '.') {
-                       return TRUE;
-               }
-       }
-       else if (p1 == t1->begin) {
-               if (p2 != t2->begin && *(p2 - 1) == '.') {
-                       return TRUE;
-               }
-       }
-
-       return FALSE;
-}
-
-static void
-rspamd_html_url_is_phished(rspamd_mempool_t *pool,
-                                                  struct rspamd_url *href_url,
-                                                  const guchar *url_text,
-                                                  gsize len,
-                                                  gboolean *url_found,
-                                                  struct rspamd_url **ptext_url) {
-       struct rspamd_url *text_url;
-       rspamd_ftok_t disp_tok, href_tok;
-       gint rc;
-       goffset url_pos;
-       gchar *url_str = NULL, *idn_hbuf;
-       const guchar *end = url_text + len, *p;
-#if U_ICU_VERSION_MAJOR_NUM >= 46
-       static UIDNA *udn;
-       UErrorCode uc_err = U_ZERO_ERROR;
-       UIDNAInfo uinfo = UIDNA_INFO_INITIALIZER;
-#endif
-
-       *url_found = FALSE;
-#if U_ICU_VERSION_MAJOR_NUM >= 46
-       if (udn == NULL) {
-               udn = uidna_openUTS46(UIDNA_DEFAULT, &uc_err);
-
-               if (uc_err != U_ZERO_ERROR) {
-                       msg_err_pool ("cannot init idna converter: %s", u_errorName(uc_err));
-               }
-       }
-#endif
-
-       while (url_text < end && g_ascii_isspace (*url_text)) {
-               url_text++;
-       }
-
-       if (end > url_text + 4 &&
-               rspamd_url_find(pool, url_text, end - url_text, &url_str,
-                               RSPAMD_URL_FIND_ALL,
-                               &url_pos, NULL) &&
-               url_str != NULL) {
-               if (url_pos > 0) {
-                       /*
-                        * We have some url at some offset, so we need to check what is
-                        * at the start of the text
-                        */
-                       p = url_text;
-
-                       while (p < url_text + url_pos) {
-                               if (!g_ascii_isspace (*p)) {
-                                       *url_found = FALSE;
-                                       return;
-                               }
-
-                               p++;
-                       }
-               }
-
-               text_url = rspamd_mempool_alloc0 (pool, sizeof(struct rspamd_url));
-               rc = rspamd_url_parse(text_url, url_str, strlen(url_str), pool,
-                               RSPAMD_URL_PARSE_TEXT);
-
-               if (rc == URI_ERRNO_OK) {
-                       disp_tok.len = text_url->hostlen;
-                       disp_tok.begin = rspamd_url_host_unsafe (text_url);
-#if U_ICU_VERSION_MAJOR_NUM >= 46
-                       if (rspamd_substring_search_caseless(rspamd_url_host_unsafe (text_url),
-                                       text_url->hostlen, "xn--", 4) != -1) {
-                               idn_hbuf = rspamd_mempool_alloc (pool, text_url->hostlen * 2 + 1);
-                               /* We need to convert it to the normal value first */
-                               disp_tok.len = uidna_nameToUnicodeUTF8(udn,
-                                               rspamd_url_host_unsafe (text_url), text_url->hostlen,
-                                               idn_hbuf, text_url->hostlen * 2 + 1, &uinfo, &uc_err);
-
-                               if (uc_err != U_ZERO_ERROR) {
-                                       msg_err_pool ("cannot convert to IDN: %s",
-                                                       u_errorName(uc_err));
-                                       disp_tok.len = text_url->hostlen;
-                               }
-                               else {
-                                       disp_tok.begin = idn_hbuf;
-                               }
-                       }
-#endif
-                       href_tok.len = href_url->hostlen;
-                       href_tok.begin = rspamd_url_host_unsafe (href_url);
-#if U_ICU_VERSION_MAJOR_NUM >= 46
-                       if (rspamd_substring_search_caseless(rspamd_url_host_unsafe (href_url),
-                                       href_url->hostlen, "xn--", 4) != -1) {
-                               idn_hbuf = rspamd_mempool_alloc (pool, href_url->hostlen * 2 + 1);
-                               /* We need to convert it to the normal value first */
-                               href_tok.len = uidna_nameToUnicodeUTF8(udn,
-                                               rspamd_url_host_unsafe (href_url), href_url->hostlen,
-                                               idn_hbuf, href_url->hostlen * 2 + 1, &uinfo, &uc_err);
-
-                               if (uc_err != U_ZERO_ERROR) {
-                                       msg_err_pool ("cannot convert to IDN: %s",
-                                                       u_errorName(uc_err));
-                                       href_tok.len = href_url->hostlen;
-                               }
-                               else {
-                                       href_tok.begin = idn_hbuf;
-                               }
-                       }
-#endif
-                       if (rspamd_ftok_casecmp(&disp_tok, &href_tok) != 0 &&
-                               text_url->tldlen > 0 && href_url->tldlen > 0) {
-
-                               /* Apply the same logic for TLD */
-                               disp_tok.len = text_url->tldlen;
-                               disp_tok.begin = rspamd_url_tld_unsafe (text_url);
-#if U_ICU_VERSION_MAJOR_NUM >= 46
-                               if (rspamd_substring_search_caseless(rspamd_url_tld_unsafe (text_url),
-                                               text_url->tldlen, "xn--", 4) != -1) {
-                                       idn_hbuf = rspamd_mempool_alloc (pool, text_url->tldlen * 2 + 1);
-                                       /* We need to convert it to the normal value first */
-                                       disp_tok.len = uidna_nameToUnicodeUTF8(udn,
-                                                       rspamd_url_tld_unsafe (text_url), text_url->tldlen,
-                                                       idn_hbuf, text_url->tldlen * 2 + 1, &uinfo, &uc_err);
-
-                                       if (uc_err != U_ZERO_ERROR) {
-                                               msg_err_pool ("cannot convert to IDN: %s",
-                                                               u_errorName(uc_err));
-                                               disp_tok.len = text_url->tldlen;
-                                       }
-                                       else {
-                                               disp_tok.begin = idn_hbuf;
-                                       }
-                               }
-#endif
-                               href_tok.len = href_url->tldlen;
-                               href_tok.begin = rspamd_url_tld_unsafe (href_url);
-#if U_ICU_VERSION_MAJOR_NUM >= 46
-                               if (rspamd_substring_search_caseless(rspamd_url_tld_unsafe (href_url),
-                                               href_url->tldlen, "xn--", 4) != -1) {
-                                       idn_hbuf = rspamd_mempool_alloc (pool, href_url->tldlen * 2 + 1);
-                                       /* We need to convert it to the normal value first */
-                                       href_tok.len = uidna_nameToUnicodeUTF8(udn,
-                                                       rspamd_url_tld_unsafe (href_url), href_url->tldlen,
-                                                       idn_hbuf, href_url->tldlen * 2 + 1, &uinfo, &uc_err);
-
-                                       if (uc_err != U_ZERO_ERROR) {
-                                               msg_err_pool ("cannot convert to IDN: %s",
-                                                               u_errorName(uc_err));
-                                               href_tok.len = href_url->tldlen;
-                                       }
-                                       else {
-                                               href_tok.begin = idn_hbuf;
-                                       }
-                               }
-#endif
-                               if (rspamd_ftok_casecmp(&disp_tok, &href_tok) != 0) {
-                                       /* Check if one url is a subdomain for another */
-
-                                       if (!rspamd_url_is_subdomain(&disp_tok, &href_tok)) {
-                                               href_url->flags |= RSPAMD_URL_FLAG_PHISHED;
-                                               href_url->linked_url = text_url;
-                                               text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
-                                       }
-                               }
-                       }
-
-                       *ptext_url = text_url;
-                       *url_found = TRUE;
-               }
-               else {
-                       /*
-                        * We have found something that looks like an url but it was
-                        * not parsed correctly.
-                        * Sometimes it means an obfuscation attempt, so we have to check
-                        * what's inside of the text
-                        */
-                       gboolean obfuscation_found = FALSE;
-
-                       if (len > 4 && g_ascii_strncasecmp(url_text, "http", 4) == 0 &&
-                               rspamd_substring_search(url_text, len, "://", 3) != -1) {
-                               /* Clearly an obfuscation attempt */
-                               obfuscation_found = TRUE;
-                       }
-
-                       msg_info_pool ("extract of url '%s' failed: %s; obfuscation detected: %s",
-                                       url_str,
-                                       rspamd_url_strerror(rc),
-                                       obfuscation_found ? "yes" : "no");
-
-                       if (obfuscation_found) {
-                               href_url->flags |= RSPAMD_URL_FLAG_PHISHED | RSPAMD_URL_FLAG_OBSCURED;
-                       }
-               }
-       }
-
-}
-
-static gboolean
-rspamd_html_process_tag(rspamd_mempool_t *pool, struct html_content *hc,
-                                               struct html_tag *tag, GNode **cur_level, gboolean *balanced) {
-       GNode *nnode;
-       struct html_tag *parent;
-
-       if (hc->html_tags == NULL) {
-               nnode = g_node_new(NULL);
-               *cur_level = nnode;
-               hc->html_tags = nnode;
-               rspamd_mempool_add_destructor (pool,
-                               (rspamd_mempool_destruct_t) g_node_destroy,
-                               nnode);
-       }
-
-       if (hc->total_tags > max_tags) {
-               hc->flags |= RSPAMD_HTML_FLAG_TOO_MANY_TAGS;
-       }
-
-       if (tag->id == -1) {
-               /* Ignore unknown tags */
-               hc->total_tags++;
-               return FALSE;
-       }
-
-       tag->parent = *cur_level;
-
-       if (!(tag->flags & (CM_INLINE | CM_EMPTY))) {
-               /* Block tag */
-               if (tag->flags & (FL_CLOSING | FL_CLOSED)) {
-                       if (!*cur_level) {
-                               msg_debug_html ("bad parent node");
-                               return FALSE;
-                       }
-
-                       if (hc->total_tags < max_tags) {
-                               nnode = g_node_new(tag);
-                               g_node_append (*cur_level, nnode);
-
-                               if (!rspamd_html_check_balance(nnode, cur_level)) {
-                                       msg_debug_html (
-                                                       "mark part as unbalanced as it has not pairable closing tags");
-                                       hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED;
-                                       *balanced = FALSE;
-                               }
-                               else {
-                                       *balanced = TRUE;
-                               }
-
-                               hc->total_tags++;
-                       }
-               }
-               else {
-                       parent = (*cur_level)->data;
-
-                       if (parent) {
-                               if ((parent->flags & FL_IGNORE)) {
-                                       tag->flags |= FL_IGNORE;
-                               }
-
-                               if (!(tag->flags & FL_CLOSED) &&
-                                       !(parent->flags & FL_BLOCK)) {
-                                       /* We likely have some bad nesting */
-                                       if (parent->id == tag->id) {
-                                               /* Something like <a>bla<a>foo... */
-                                               hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED;
-                                               *balanced = FALSE;
-                                               tag->parent = parent->parent;
-
-                                               if (hc->total_tags < max_tags) {
-                                                       nnode = g_node_new(tag);
-                                                       g_node_append (parent->parent, nnode);
-                                                       *cur_level = nnode;
-                                                       hc->total_tags++;
-                                               }
-
-                                               return TRUE;
-                                       }
-                               }
-                       }
-
-                       if (hc->total_tags < max_tags) {
-                               nnode = g_node_new(tag);
-                               g_node_append (*cur_level, nnode);
-
-                               if ((tag->flags & FL_CLOSED) == 0) {
-                                       *cur_level = nnode;
-                               }
-
-                               hc->total_tags++;
-                       }
-
-                       if (tag->flags & (CM_HEAD | CM_UNKNOWN | FL_IGNORE)) {
-                               tag->flags |= FL_IGNORE;
-
-                               return FALSE;
-                       }
-
-               }
-       }
-       else {
-               /* Inline tag */
-               parent = (*cur_level)->data;
-
-               if (parent) {
-                       if (hc->total_tags < max_tags) {
-                               nnode = g_node_new(tag);
-                               g_node_append (*cur_level, nnode);
-
-                               hc->total_tags++;
-                       }
-                       if ((parent->flags & (CM_HEAD | CM_UNKNOWN | FL_IGNORE))) {
-                               tag->flags |= FL_IGNORE;
-
-                               return FALSE;
-                       }
-               }
-       }
-
-       return TRUE;
-}
-
-#define NEW_COMPONENT(comp_type) do {                            \
-    comp = rspamd_mempool_alloc (pool, sizeof (*comp));            \
-    comp->type = (comp_type);                                    \
-    comp->start = NULL;                                            \
-    comp->len = 0;                                                \
-    g_queue_push_tail (tag->params, comp);                        \
-    ret = TRUE;                                                    \
-} while(0)
-
-static gboolean
-rspamd_html_parse_tag_component(rspamd_mempool_t *pool,
-                                                               const guchar *begin, const guchar *end,
-                                                               struct html_tag *tag) {
-       struct html_tag_component *comp;
-       gint len;
-       gboolean ret = FALSE;
-       gchar *p;
-
-       if (end <= begin) {
-               return FALSE;
-       }
-
-       p = rspamd_mempool_alloc (pool, end - begin);
-       memcpy(p, begin, end - begin);
-       len = rspamd_html_decode_entitles_inplace(p, end - begin);
-
-       if (len == 3) {
-               if (g_ascii_strncasecmp(p, "src", len) == 0) {
-                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF);
-               }
-               else if (g_ascii_strncasecmp(p, "rel", len) == 0) {
-                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_REL);
-               }
-               else if (g_ascii_strncasecmp(p, "alt", len) == 0) {
-                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_ALT);
-               }
-       }
-       else if (len == 4) {
-               if (g_ascii_strncasecmp(p, "href", len) == 0) {
-                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF);
-               }
-       }
-       else if (len == 6) {
-               if (g_ascii_strncasecmp(p, "action", len) == 0) {
-                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF);
-               }
-       }
-
-       if (tag->id == Tag_IMG) {
-               /* Check width and height if presented */
-               if (len == 5 && g_ascii_strncasecmp(p, "width", len) == 0) {
-                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_WIDTH);
-               }
-               else if (len == 6 && g_ascii_strncasecmp(p, "height", len) == 0) {
-                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HEIGHT);
-               }
-               else if (g_ascii_strncasecmp(p, "style", len) == 0) {
-                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE);
-               }
-       }
-       else if (tag->id == Tag_FONT) {
-               if (len == 5) {
-                       if (g_ascii_strncasecmp(p, "color", len) == 0) {
-                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_COLOR);
-                       }
-                       else if (g_ascii_strncasecmp(p, "style", len) == 0) {
-                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE);
-                       }
-                       else if (g_ascii_strncasecmp(p, "class", len) == 0) {
-                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_CLASS);
-                       }
-               }
-               else if (len == 7) {
-                       if (g_ascii_strncasecmp(p, "bgcolor", len) == 0) {
-                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_BGCOLOR);
-                       }
-               }
-               else if (len == 4) {
-                       if (g_ascii_strncasecmp(p, "size", len) == 0) {
-                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_SIZE);
-                       }
-               }
-       }
-       else if (tag->flags & FL_BLOCK) {
-               if (len == 5) {
-                       if (g_ascii_strncasecmp(p, "color", len) == 0) {
-                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_COLOR);
-                       }
-                       else if (g_ascii_strncasecmp(p, "style", len) == 0) {
-                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE);
-                       }
-                       else if (g_ascii_strncasecmp(p, "class", len) == 0) {
-                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_CLASS);
-                       }
-               }
-               else if (len == 7) {
-                       if (g_ascii_strncasecmp(p, "bgcolor", len) == 0) {
-                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_BGCOLOR);
-                       }
-               }
-       }
-
-       return ret;
-}
-
-static inline void
-rspamd_html_parse_tag_content(rspamd_mempool_t *pool,
-                                                         struct html_content *hc, struct html_tag *tag, const guchar *in,
-                                                         gint *statep, guchar const **savep) {
-       enum {
-               parse_start = 0,
-               parse_name,
-               parse_attr_name,
-               parse_equal,
-               parse_start_dquote,
-               parse_dqvalue,
-               parse_end_dquote,
-               parse_start_squote,
-               parse_sqvalue,
-               parse_end_squote,
-               parse_value,
-               spaces_after_name,
-               spaces_before_eq,
-               spaces_after_eq,
-               spaces_after_param,
-               ignore_bad_tag
-       } state;
-       struct html_tag_def *found;
-       gboolean store = FALSE;
-       struct html_tag_component *comp;
-
-       state = *statep;
-
-       switch (state) {
-       case parse_start:
-               if (!g_ascii_isalpha (*in) && !g_ascii_isspace (*in)) {
-                       hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
-                       state = ignore_bad_tag;
-                       tag->id = -1;
-                       tag->flags |= FL_BROKEN;
-               }
-               else if (g_ascii_isalpha (*in)) {
-                       state = parse_name;
-                       tag->name.start = in;
-               }
-               break;
-
-       case parse_name:
-               if (g_ascii_isspace (*in) || *in == '>' || *in == '/') {
-                       g_assert (in >= tag->name.start);
-
-                       if (*in == '/') {
-                               tag->flags |= FL_CLOSED;
-                       }
-
-                       tag->name.len = in - tag->name.start;
-
-                       if (tag->name.len == 0) {
-                               hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
-                               tag->id = -1;
-                               tag->flags |= FL_BROKEN;
-                               state = ignore_bad_tag;
-                       }
-                       else {
-                               gchar *s;
-                               khiter_t k;
-                               /* We CANNOT safely modify tag's name here, as it is already parsed */
-
-                               s = rspamd_mempool_alloc (pool, tag->name.len + 1);
-                               memcpy(s, tag->name.start, tag->name.len);
-                               tag->name.len = rspamd_html_decode_entitles_inplace(s,
-                                               tag->name.len);
-                               tag->name.start = s;
-                               tag->name.len = rspamd_str_lc_utf8(s, tag->name.len);
-                               s[tag->name.len] = '\0';
-
-                               k = kh_get (tag_by_name, html_tag_by_name, s);
-
-                               if (k == kh_end (html_tag_by_name)) {
-                                       hc->flags |= RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS;
-                                       tag->id = -1;
-                               }
-                               else {
-                                       found = &kh_val (html_tag_by_name, k);
-                                       tag->id = found->id;
-                                       tag->flags = found->flags;
-                               }
-
-                               state = spaces_after_name;
-                       }
-               }
-               break;
-
-       case parse_attr_name:
-               if (*savep == NULL) {
-                       state = ignore_bad_tag;
-               }
-               else {
-                       const guchar *attr_name_end = in;
-
-                       if (*in == '=') {
-                               state = parse_equal;
-                       }
-                       else if (*in == '"') {
-                               /* No equal or something sane but we have quote character */
-                               state = parse_start_dquote;
-                               attr_name_end = in - 1;
-
-                               while (attr_name_end > *savep) {
-                                       if (!g_ascii_isalnum (*attr_name_end)) {
-                                               attr_name_end--;
-                                       }
-                                       else {
-                                               break;
-                                       }
-                               }
-
-                               /* One character forward to obtain length */
-                               attr_name_end++;
-                       }
-                       else if (g_ascii_isspace (*in)) {
-                               state = spaces_before_eq;
-                       }
-                       else if (*in == '/') {
-                               tag->flags |= FL_CLOSED;
-                       }
-                       else if (!g_ascii_isgraph (*in)) {
-                               state = parse_value;
-                               attr_name_end = in - 1;
-
-                               while (attr_name_end > *savep) {
-                                       if (!g_ascii_isalnum (*attr_name_end)) {
-                                               attr_name_end--;
-                                       }
-                                       else {
-                                               break;
-                                       }
-                               }
-
-                               /* One character forward to obtain length */
-                               attr_name_end++;
-                       }
-                       else {
-                               return;
-                       }
-
-                       if (!rspamd_html_parse_tag_component(pool, *savep, attr_name_end, tag)) {
-                               /* Ignore unknown params */
-                               *savep = NULL;
-                       }
-                       else if (state == parse_value) {
-                               *savep = in + 1;
-                       }
-               }
-
-               break;
-
-       case spaces_after_name:
-               if (!g_ascii_isspace (*in)) {
-                       *savep = in;
-                       if (*in == '/') {
-                               tag->flags |= FL_CLOSED;
-                       }
-                       else if (*in != '>') {
-                               state = parse_attr_name;
-                       }
-               }
-               break;
-
-       case spaces_before_eq:
-               if (*in == '=') {
-                       state = parse_equal;
-               }
-               else if (!g_ascii_isspace (*in)) {
-                       /*
-                        * HTML defines that crap could still be restored and
-                        * calculated somehow... So we have to follow this stupid behaviour
-                        */
-                       /*
-                        * TODO: estimate what insane things do email clients in each case
-                        */
-                       if (*in == '>') {
-                               /*
-                                * Attribtute name followed by end of tag
-                                * Should be okay (empty attribute). The rest is handled outside
-                                * this automata.
-                                */
-
-                       }
-                       else if (*in == '"' || *in == '\'') {
-                               /* Attribute followed by quote... Missing '=' ? Dunno, need to test */
-                               hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
-                               tag->flags |= FL_BROKEN;
-                               state = ignore_bad_tag;
-                       }
-                       else {
-                               /*
-                                * Just start another attribute ignoring an empty attributes for
-                                * now. We don't use them in fact...
-                                */
-                               state = parse_attr_name;
-                               *savep = in;
-                       }
-               }
-               break;
-
-       case spaces_after_eq:
-               if (*in == '"') {
-                       state = parse_start_dquote;
-               }
-               else if (*in == '\'') {
-                       state = parse_start_squote;
-               }
-               else if (!g_ascii_isspace (*in)) {
-                       if (*savep != NULL) {
-                               /* We need to save this param */
-                               *savep = in;
-                       }
-                       state = parse_value;
-               }
-               break;
-
-       case parse_equal:
-               if (g_ascii_isspace (*in)) {
-                       state = spaces_after_eq;
-               }
-               else if (*in == '"') {
-                       state = parse_start_dquote;
-               }
-               else if (*in == '\'') {
-                       state = parse_start_squote;
-               }
-               else {
-                       if (*savep != NULL) {
-                               /* We need to save this param */
-                               *savep = in;
-                       }
-                       state = parse_value;
-               }
-               break;
-
-       case parse_start_dquote:
-               if (*in == '"') {
-                       if (*savep != NULL) {
-                               /* We have an empty attribute value */
-                               savep = NULL;
-                       }
-                       state = spaces_after_param;
-               }
-               else {
-                       if (*savep != NULL) {
-                               /* We need to save this param */
-                               *savep = in;
-                       }
-                       state = parse_dqvalue;
-               }
-               break;
-
-       case parse_start_squote:
-               if (*in == '\'') {
-                       if (*savep != NULL) {
-                               /* We have an empty attribute value */
-                               savep = NULL;
-                       }
-                       state = spaces_after_param;
-               }
-               else {
-                       if (*savep != NULL) {
-                               /* We need to save this param */
-                               *savep = in;
-                       }
-                       state = parse_sqvalue;
-               }
-               break;
-
-       case parse_dqvalue:
-               if (*in == '"') {
-                       store = TRUE;
-                       state = parse_end_dquote;
-               }
-
-               if (store) {
-                       if (*savep != NULL) {
-                               gchar *s;
-
-                               g_assert (tag->params != NULL);
-                               comp = g_queue_peek_tail(tag->params);
-                               g_assert (comp != NULL);
-                               comp->len = in - *savep;
-                               s = rspamd_mempool_alloc (pool, comp->len);
-                               memcpy(s, *savep, comp->len);
-                               comp->len = rspamd_html_decode_entitles_inplace(s, comp->len);
-                               comp->start = s;
-                               *savep = NULL;
-                       }
-               }
-               break;
-
-       case parse_sqvalue:
-               if (*in == '\'') {
-                       store = TRUE;
-                       state = parse_end_squote;
-               }
-               if (store) {
-                       if (*savep != NULL) {
-                               gchar *s;
-
-                               g_assert (tag->params != NULL);
-                               comp = g_queue_peek_tail(tag->params);
-                               g_assert (comp != NULL);
-                               comp->len = in - *savep;
-                               s = rspamd_mempool_alloc (pool, comp->len);
-                               memcpy(s, *savep, comp->len);
-                               comp->len = rspamd_html_decode_entitles_inplace(s, comp->len);
-                               comp->start = s;
-                               *savep = NULL;
-                       }
-               }
-               break;
-
-       case parse_value:
-               if (*in == '/' && *(in + 1) == '>') {
-                       tag->flags |= FL_CLOSED;
-                       store = TRUE;
-               }
-               else if (g_ascii_isspace (*in) || *in == '>' || *in == '"') {
-                       store = TRUE;
-                       state = spaces_after_param;
-               }
-
-               if (store) {
-                       if (*savep != NULL) {
-                               gchar *s;
-
-                               g_assert (tag->params != NULL);
-                               comp = g_queue_peek_tail(tag->params);
-                               g_assert (comp != NULL);
-                               comp->len = in - *savep;
-                               s = rspamd_mempool_alloc (pool, comp->len);
-                               memcpy(s, *savep, comp->len);
-                               comp->len = rspamd_html_decode_entitles_inplace(s, comp->len);
-                               comp->start = s;
-                               *savep = NULL;
-                       }
-               }
-               break;
-
-       case parse_end_dquote:
-       case parse_end_squote:
-               if (g_ascii_isspace (*in)) {
-                       state = spaces_after_param;
-               }
-               else if (*in == '/' && *(in + 1) == '>') {
-                       tag->flags |= FL_CLOSED;
-               }
-               else {
-                       /* No space, proceed immediately to the attribute name */
-                       state = parse_attr_name;
-                       *savep = in;
-               }
-               break;
-
-       case spaces_after_param:
-               if (!g_ascii_isspace (*in)) {
-                       if (*in == '/' && *(in + 1) == '>') {
-                               tag->flags |= FL_CLOSED;
-                       }
-
-                       state = parse_attr_name;
-                       *savep = in;
-               }
-               break;
-
-       case ignore_bad_tag:
-               break;
-       }
-
-       *statep = state;
-}
-
-
-struct rspamd_url *
-rspamd_html_process_url(rspamd_mempool_t *pool, const gchar *start, guint len,
-                                               struct html_tag_component *comp) {
-       struct rspamd_url *url;
-       guint saved_flags = 0;
-       gchar *decoded;
-       gint rc;
-       gsize decoded_len;
-       const gchar *p, *s, *prefix = "http://";
-       gchar *d;
-       guint i;
-       gsize dlen;
-       gboolean has_bad_chars = FALSE, no_prefix = FALSE;
-       static const gchar hexdigests[16] = "0123456789abcdef";
-
-       p = start;
-
-       /* Strip spaces from the url */
-       /* Head spaces */
-       while (p < start + len && g_ascii_isspace (*p)) {
-               p++;
-               start++;
-               len--;
-       }
-
-       if (comp) {
-               comp->start = p;
-               comp->len = len;
-       }
-
-       /* Trailing spaces */
-       p = start + len - 1;
-
-       while (p >= start && g_ascii_isspace (*p)) {
-               p--;
-               len--;
-
-               if (comp) {
-                       comp->len--;
-               }
-       }
-
-       s = start;
-       dlen = 0;
-
-       for (i = 0; i < len; i++) {
-               if (G_UNLIKELY (((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) {
-                       dlen += 3;
-               }
-               else {
-                       dlen++;
-               }
-       }
-
-       if (rspamd_substring_search(start, len, "://", 3) == -1) {
-               if (len >= sizeof("mailto:") &&
-                       (memcmp(start, "mailto:", sizeof("mailto:") - 1) == 0 ||
-                        memcmp(start, "tel:", sizeof("tel:") - 1) == 0 ||
-                        memcmp(start, "callto:", sizeof("callto:") - 1) == 0)) {
-                       /* Exclusion, has valid but 'strange' prefix */
-               }
-               else {
-                       for (i = 0; i < len; i++) {
-                               if (!((s[i] & 0x80) || g_ascii_isalnum (s[i]))) {
-                                       if (i == 0 && len > 2 && s[i] == '/' && s[i + 1] == '/') {
-                                               prefix = "http:";
-                                               dlen += sizeof("http:") - 1;
-                                               no_prefix = TRUE;
-                                       }
-                                       else if (s[i] == '@') {
-                                               /* Likely email prefix */
-                                               prefix = "mailto://";
-                                               dlen += sizeof("mailto://") - 1;
-                                               no_prefix = TRUE;
-                                       }
-                                       else if (s[i] == ':' && i != 0) {
-                                               /* Special case */
-                                               no_prefix = FALSE;
-                                       }
-                                       else {
-                                               if (i == 0) {
-                                                       /* No valid data */
-                                                       return NULL;
-                                               }
-                                               else {
-                                                       no_prefix = TRUE;
-                                                       dlen += strlen(prefix);
-                                               }
-                                       }
-
-                                       break;
-                               }
-                       }
-               }
-       }
-
-       decoded = rspamd_mempool_alloc (pool, dlen + 1);
-       d = decoded;
-
-       if (no_prefix) {
-               gsize plen = strlen(prefix);
-               memcpy(d, prefix, plen);
-               d += plen;
-       }
-
-       /*
-        * We also need to remove all internal newlines, spaces
-        * and encode unsafe characters
-        */
-       for (i = 0; i < len; i++) {
-               if (G_UNLIKELY (g_ascii_isspace(s[i]))) {
-                       continue;
-               }
-               else if (G_UNLIKELY (((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) {
-                       /* URL encode */
-                       *d++ = '%';
-                       *d++ = hexdigests[(s[i] >> 4) & 0xf];
-                       *d++ = hexdigests[s[i] & 0xf];
-                       has_bad_chars = TRUE;
-               }
-               else {
-                       *d++ = s[i];
-               }
-       }
-
-       *d = '\0';
-       dlen = d - decoded;
-
-       url = rspamd_mempool_alloc0 (pool, sizeof(*url));
-
-       rspamd_url_normalise_propagate_flags (pool, decoded, &dlen, saved_flags);
-
-       rc = rspamd_url_parse(url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF);
-
-       /* Filter some completely damaged urls */
-       if (rc == URI_ERRNO_OK && url->hostlen > 0 &&
-               !((url->protocol & PROTOCOL_UNKNOWN))) {
-               url->flags |= saved_flags;
-
-               if (has_bad_chars) {
-                       url->flags |= RSPAMD_URL_FLAG_OBSCURED;
-               }
-
-               if (no_prefix) {
-                       url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
-
-                       if (url->tldlen == 0 || (url->flags & RSPAMD_URL_FLAG_NO_TLD)) {
-                               /* Ignore urls with both no schema and no tld */
-                               return NULL;
-                       }
-               }
-
-               decoded = url->string;
-               decoded_len = url->urllen;
-
-               if (comp) {
-                       comp->start = decoded;
-                       comp->len = decoded_len;
-               }
-               /* Spaces in href usually mean an attempt to obfuscate URL */
-               /* See https://github.com/vstakhov/rspamd/issues/593 */
-#if 0
-               if (has_spaces) {
-                       url->flags |= RSPAMD_URL_FLAG_OBSCURED;
-               }
-#endif
-
-               return url;
-       }
-
-       return NULL;
-}
-
-static struct rspamd_url *
-rspamd_html_process_url_tag(rspamd_mempool_t *pool, struct html_tag *tag,
-                                                       struct html_content *hc) {
-       struct html_tag_component *comp;
-       GList *cur;
-       struct rspamd_url *url;
-       const gchar *start;
-       gsize len;
-
-       cur = tag->params->head;
-
-       while (cur) {
-               comp = cur->data;
-
-               if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) {
-                       start = comp->start;
-                       len = comp->len;
-
-                       /* Check base url */
-                       if (hc && hc->base_url && comp->len > 2) {
-                               /*
-                                * Relative url cannot start from the following:
-                                * schema://
-                                * data:
-                                * slash
-                                */
-                               gchar *buf;
-                               gsize orig_len;
-
-                               if (rspamd_substring_search(start, len, "://", 3) == -1) {
-
-                                       if (len >= sizeof("data:") &&
-                                               g_ascii_strncasecmp(start, "data:", sizeof("data:") - 1) == 0) {
-                                               /* Image data url, never insert as url */
-                                               return NULL;
-                                       }
-
-                                       /* Assume relative url */
-
-                                       gboolean need_slash = FALSE;
-
-                                       orig_len = len;
-                                       len += hc->base_url->urllen;
-
-                                       if (hc->base_url->datalen == 0) {
-                                               need_slash = TRUE;
-                                               len++;
-                                       }
-
-                                       buf = rspamd_mempool_alloc (pool, len + 1);
-                                       rspamd_snprintf(buf, len + 1, "%*s%s%*s",
-                                                       hc->base_url->urllen, hc->base_url->string,
-                                                       need_slash ? "/" : "",
-                                                       (gint) orig_len, start);
-                                       start = buf;
-                               }
-                               else if (start[0] == '/' && start[1] != '/') {
-                                       /* Relative to the hostname */
-                                       orig_len = len;
-                                       len += hc->base_url->hostlen + hc->base_url->protocollen +
-                                                  3 /* for :// */;
-                                       buf = rspamd_mempool_alloc (pool, len + 1);
-                                       rspamd_snprintf(buf, len + 1, "%*s://%*s/%*s",
-                                                       hc->base_url->protocollen, hc->base_url->string,
-                                                       hc->base_url->hostlen, rspamd_url_host_unsafe (hc->base_url),
-                                                       (gint) orig_len, start);
-                                       start = buf;
-                               }
-                       }
-
-                       url = rspamd_html_process_url(pool, start, len, comp);
-
-                       if (url && tag->extra == NULL) {
-                               tag->extra = url;
-                       }
-
-                       return url;
-               }
-
-               cur = g_list_next (cur);
-       }
-
-       return NULL;
-}
-
-struct rspamd_html_url_query_cbd {
-       rspamd_mempool_t *pool;
-       khash_t (rspamd_url_hash) *url_set;
-       struct rspamd_url *url;
-       GPtrArray *part_urls;
-};
-
-static gboolean
-rspamd_html_url_query_callback(struct rspamd_url *url, gsize start_offset,
-                                                          gsize end_offset, gpointer ud) {
-       struct rspamd_html_url_query_cbd *cbd =
-                       (struct rspamd_html_url_query_cbd *) ud;
-       rspamd_mempool_t *pool;
-
-       pool = cbd->pool;
-
-       if (url->protocol == PROTOCOL_MAILTO) {
-               if (url->userlen == 0) {
-                       return FALSE;
-               }
-       }
-
-       msg_debug_html ("found url %s in query of url"
-                                       " %*s", url->string,
-                       cbd->url->querylen, rspamd_url_query_unsafe(cbd->url));
-
-       url->flags |= RSPAMD_URL_FLAG_QUERY;
-
-       if (rspamd_url_set_add_or_increase(cbd->url_set, url, false)
-               && cbd->part_urls) {
-               g_ptr_array_add(cbd->part_urls, url);
-       }
-
-       return TRUE;
-}
-
-static void
-rspamd_process_html_url(rspamd_mempool_t *pool, struct rspamd_url *url,
-                                               khash_t (rspamd_url_hash) *url_set,
-                                               GPtrArray *part_urls) {
-       if (url->querylen > 0) {
-               struct rspamd_html_url_query_cbd qcbd;
-
-               qcbd.pool = pool;
-               qcbd.url_set = url_set;
-               qcbd.url = url;
-               qcbd.part_urls = part_urls;
-
-               rspamd_url_find_multiple(pool,
-                               rspamd_url_query_unsafe (url), url->querylen,
-                               RSPAMD_URL_FIND_ALL, NULL,
-                               rspamd_html_url_query_callback, &qcbd);
-       }
-
-       if (part_urls) {
-               g_ptr_array_add(part_urls, url);
-       }
-}
-
-static void
-rspamd_html_process_data_image(rspamd_mempool_t *pool,
-                                                          struct html_image *img,
-                                                          struct html_tag_component *src) {
-       /*
-        * Here, we do very basic processing of the data:
-        * detect if we have something like: `data:image/xxx;base64,yyyzzz==`
-        * We only parse base64 encoded data.
-        * We ignore content type so far
-        */
-       struct rspamd_image *parsed_image;
-       const gchar *semicolon_pos = NULL, *end = src->start + src->len;
-
-       semicolon_pos = src->start;
-
-       while ((semicolon_pos = memchr(semicolon_pos, ';', end - semicolon_pos)) != NULL) {
-               if (end - semicolon_pos > sizeof("base64,")) {
-                       if (memcmp(semicolon_pos + 1, "base64,", sizeof("base64,") - 1) == 0) {
-                               const gchar *data_pos = semicolon_pos + sizeof("base64,");
-                               gchar *decoded;
-                               gsize encoded_len = end - data_pos, decoded_len;
-                               rspamd_ftok_t inp;
-
-                               decoded_len = (encoded_len / 4 * 3) + 12;
-                               decoded = rspamd_mempool_alloc (pool, decoded_len);
-                               rspamd_cryptobox_base64_decode(data_pos, encoded_len,
-                                               decoded, &decoded_len);
-                               inp.begin = decoded;
-                               inp.len = decoded_len;
-
-                               parsed_image = rspamd_maybe_process_image(pool, &inp);
-
-                               if (parsed_image) {
-                                       msg_debug_html ("detected %s image of size %ud x %ud in data url",
-                                                       rspamd_image_type_str(parsed_image->type),
-                                                       parsed_image->width, parsed_image->height);
-                                       img->embedded_image = parsed_image;
-                               }
-                       }
-
-                       break;
-               }
-               else {
-                       /* Nothing useful */
-                       return;
-               }
-
-               semicolon_pos++;
-       }
-}
-
-static void
-rspamd_html_process_img_tag(rspamd_mempool_t *pool, struct html_tag *tag,
-                                                       struct html_content *hc, khash_t (rspamd_url_hash) *url_set,
-                                                       GPtrArray *part_urls,
-                                                       GByteArray *dest) {
-       struct html_tag_component *comp;
-       struct html_image *img;
-       rspamd_ftok_t fstr;
-       const guchar *p;
-       GList *cur;
-       gulong val;
-       gboolean seen_width = FALSE, seen_height = FALSE;
-       goffset pos;
-
-       cur = tag->params->head;
-       img = rspamd_mempool_alloc0 (pool, sizeof(*img));
-       img->tag = tag;
-       tag->flags |= FL_IMAGE;
-
-       while (cur) {
-               comp = cur->data;
-
-               if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) {
-                       fstr.begin = (gchar *) comp->start;
-                       fstr.len = comp->len;
-                       img->src = rspamd_mempool_ftokdup (pool, &fstr);
-
-                       if (comp->len > sizeof("cid:") - 1 && memcmp(comp->start,
-                                       "cid:", sizeof("cid:") - 1) == 0) {
-                               /* We have an embedded image */
-                               img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
-                       }
-                       else {
-                               if (comp->len > sizeof("data:") - 1 && memcmp(comp->start,
-                                               "data:", sizeof("data:") - 1) == 0) {
-                                       /* We have an embedded image in HTML tag */
-                                       img->flags |=
-                                                       (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA);
-                                       rspamd_html_process_data_image(pool, img, comp);
-                                       hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
-                               }
-                               else {
-                                       img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
-                                       if (img->src) {
-
-                                               img->url = rspamd_html_process_url(pool,
-                                                               img->src, fstr.len, NULL);
-
-                                               if (img->url) {
-                                                       struct rspamd_url *existing;
-
-                                                       img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
-                                                       existing = rspamd_url_set_add_or_return(url_set, img->url);
-
-                                                       if (existing != img->url) {
-                                                               /*
-                                                                * We have some other URL that could be
-                                                                * found, e.g. from another part. However,
-                                                                * we still want to set an image flag on it
-                                                                */
-                                                               existing->flags |= img->url->flags;
-                                                               existing->count++;
-                                                       }
-                                                       else if (part_urls) {
-                                                               /* New url */
-                                                               g_ptr_array_add(part_urls, img->url);
-                                                       }
-                                               }
-                                       }
-                               }
-                       }
-               }
-               else if (comp->type == RSPAMD_HTML_COMPONENT_HEIGHT) {
-                       rspamd_strtoul(comp->start, comp->len, &val);
-                       img->height = val;
-                       seen_height = TRUE;
-               }
-               else if (comp->type == RSPAMD_HTML_COMPONENT_WIDTH) {
-                       rspamd_strtoul(comp->start, comp->len, &val);
-                       img->width = val;
-                       seen_width = TRUE;
-               }
-               else if (comp->type == RSPAMD_HTML_COMPONENT_STYLE) {
-                       /* Try to search for height= or width= in style tag */
-                       if (!seen_height && comp->len > 0) {
-                               pos = rspamd_substring_search_caseless(comp->start, comp->len,
-                                               "height", sizeof("height") - 1);
-
-                               if (pos != -1) {
-                                       p = comp->start + pos + sizeof("height") - 1;
-
-                                       while (p < comp->start + comp->len) {
-                                               if (g_ascii_isdigit (*p)) {
-                                                       rspamd_strtoul(p, comp->len - (p - comp->start), &val);
-                                                       img->height = val;
-                                                       break;
-                                               }
-                                               else if (!g_ascii_isspace (*p) && *p != '=' && *p != ':') {
-                                                       /* Fallback */
-                                                       break;
-                                               }
-                                               p++;
-                                       }
-                               }
-                       }
-
-                       if (!seen_width && comp->len > 0) {
-                               pos = rspamd_substring_search_caseless(comp->start, comp->len,
-                                               "width", sizeof("width") - 1);
-
-                               if (pos != -1) {
-                                       p = comp->start + pos + sizeof("width") - 1;
-
-                                       while (p < comp->start + comp->len) {
-                                               if (g_ascii_isdigit (*p)) {
-                                                       rspamd_strtoul(p, comp->len - (p - comp->start), &val);
-                                                       img->width = val;
-                                                       break;
-                                               }
-                                               else if (!g_ascii_isspace (*p) && *p != '=' && *p != ':') {
-                                                       /* Fallback */
-                                                       break;
-                                               }
-                                               p++;
-                                       }
-                               }
-                       }
-               }
-               else if (comp->type == RSPAMD_HTML_COMPONENT_ALT && comp->len > 0 && dest != NULL) {
-                       if (dest->len > 0 && !g_ascii_isspace (dest->data[dest->len - 1])) {
-                               /* Add a space */
-                               g_byte_array_append(dest, " ", 1);
-                       }
-
-                       g_byte_array_append(dest, comp->start, comp->len);
-
-                       if (!g_ascii_isspace (dest->data[dest->len - 1])) {
-                               /* Add a space */
-                               g_byte_array_append(dest, " ", 1);
-                       }
-               }
-
-               cur = g_list_next (cur);
-       }
-
-       if (hc->images == NULL) {
-               hc->images = g_ptr_array_sized_new(4);
-               rspamd_mempool_notify_alloc (pool, 4 * sizeof(gpointer) + sizeof(GPtrArray));
-               rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard,
-                               hc->images);
-       }
-
-       if (img->embedded_image) {
-               if (!seen_height) {
-                       img->height = img->embedded_image->height;
-               }
-               if (!seen_width) {
-                       img->width = img->embedded_image->width;
-               }
-       }
-
-       g_ptr_array_add(hc->images, img);
-       tag->extra = img;
-}
-
-static void
-rspamd_html_process_link_tag(rspamd_mempool_t *pool, struct html_tag *tag,
-                                                        struct html_content *hc, khash_t (rspamd_url_hash) *url_set,
-                                                        GPtrArray *part_urls) {
-       struct html_tag_component *comp;
-       GList *cur;
-
-       cur = tag->params->head;
-
-       while (cur) {
-               comp = cur->data;
-
-               if (comp->type == RSPAMD_HTML_COMPONENT_REL && comp->len > 0) {
-                       if (comp->len == sizeof("icon") - 1 &&
-                               rspamd_lc_cmp(comp->start, "icon", sizeof("icon") - 1) == 0) {
-
-                               rspamd_html_process_img_tag(pool, tag, hc, url_set, part_urls, NULL);
-                       }
-               }
-
-               cur = g_list_next (cur);
-       }
-}
-
-static void
-rspamd_html_process_color(const gchar *line, guint len, struct html_color *cl) {
-       const gchar *p = line, *end = line + len;
-       char hexbuf[7];
-       rspamd_ftok_t search;
-       struct html_color *el;
-
-       memset(cl, 0, sizeof(*cl));
-
-       if (*p == '#') {
-               /* HEX color */
-               p++;
-               rspamd_strlcpy(hexbuf, p, MIN ((gint) sizeof(hexbuf), end - p + 1));
-               cl->d.val = strtoul(hexbuf, NULL, 16);
-               cl->d.comp.alpha = 255;
-               cl->valid = TRUE;
-       }
-       else if (len > 4 && rspamd_lc_cmp(p, "rgb", 3) == 0) {
-               /* We have something like rgba(x,x,x,x) or rgb(x,x,x) */
-               enum {
-                       obrace,
-                       num1,
-                       num2,
-                       num3,
-                       num4,
-                       skip_spaces
-               } state = skip_spaces, next_state = obrace;
-               gulong r = 0, g = 0, b = 0, opacity = 255;
-               const gchar *c;
-               gboolean valid = FALSE;
-
-               p += 3;
-
-               if (*p == 'a') {
-                       p++;
-               }
-
-               c = p;
-
-               while (p < end) {
-                       switch (state) {
-                       case obrace:
-                               if (*p == '(') {
-                                       p++;
-                                       state = skip_spaces;
-                                       next_state = num1;
-                               }
-                               else if (g_ascii_isspace (*p)) {
-                                       state = skip_spaces;
-                                       next_state = obrace;
-                               }
-                               else {
-                                       goto stop;
-                               }
-                               break;
-                       case num1:
-                               if (*p == ',') {
-                                       if (!rspamd_strtoul(c, p - c, &r)) {
-                                               goto stop;
-                                       }
-
-                                       p++;
-                                       state = skip_spaces;
-                                       next_state = num2;
-                               }
-                               else if (!g_ascii_isdigit (*p)) {
-                                       goto stop;
-                               }
-                               else {
-                                       p++;
-                               }
-                               break;
-                       case num2:
-                               if (*p == ',') {
-                                       if (!rspamd_strtoul(c, p - c, &g)) {
-                                               goto stop;
-                                       }
-
-                                       p++;
-                                       state = skip_spaces;
-                                       next_state = num3;
-                               }
-                               else if (!g_ascii_isdigit (*p)) {
-                                       goto stop;
-                               }
-                               else {
-                                       p++;
-                               }
-                               break;
-                       case num3:
-                               if (*p == ',') {
-                                       if (!rspamd_strtoul(c, p - c, &b)) {
-                                               goto stop;
-                                       }
-
-                                       valid = TRUE;
-                                       p++;
-                                       state = skip_spaces;
-                                       next_state = num4;
-                               }
-                               else if (*p == ')') {
-                                       if (!rspamd_strtoul(c, p - c, &b)) {
-                                               goto stop;
-                                       }
-
-                                       valid = TRUE;
-                                       goto stop;
-                               }
-                               else if (!g_ascii_isdigit (*p)) {
-                                       goto stop;
-                               }
-                               else {
-                                       p++;
-                               }
-                               break;
-                       case num4:
-                               if (*p == ',') {
-                                       if (!rspamd_strtoul(c, p - c, &opacity)) {
-                                               goto stop;
-                                       }
-
-                                       valid = TRUE;
-                                       goto stop;
-                               }
-                               else if (*p == ')') {
-                                       if (!rspamd_strtoul(c, p - c, &opacity)) {
-                                               goto stop;
-                                       }
-
-                                       valid = TRUE;
-                                       goto stop;
-                               }
-                               else if (!g_ascii_isdigit (*p)) {
-                                       goto stop;
-                               }
-                               else {
-                                       p++;
-                               }
-                               break;
-                       case skip_spaces:
-                               if (!g_ascii_isspace (*p)) {
-                                       c = p;
-                                       state = next_state;
-                               }
-                               else {
-                                       p++;
-                               }
-                               break;
-                       }
-               }
-
-stop:
-
-               if (valid) {
-                       cl->d.comp.r = r;
-                       cl->d.comp.g = g;
-                       cl->d.comp.b = b;
-                       cl->d.comp.alpha = opacity;
-                       cl->valid = TRUE;
-               }
-       }
-       else {
-               khiter_t k;
-               /* Compare color by name */
-               search.begin = line;
-               search.len = len;
-
-               k = kh_get (color_by_name, html_color_by_name, &search);
-
-               if (k != kh_end (html_color_by_name)) {
-                       el = &kh_val (html_color_by_name, k);
-                       memcpy(cl, el, sizeof(*cl));
-                       cl->d.comp.alpha = 255; /* Non transparent */
-               }
-       }
-}
-
-/*
- * Target is used for in and out if this function returns TRUE
- */
-static gboolean
-rspamd_html_process_css_size(const gchar *suffix, gsize len,
-                                                        gdouble *tgt) {
-       gdouble sz = *tgt;
-       gboolean ret = FALSE;
-
-       if (len >= 2) {
-               if (memcmp(suffix, "px", 2) == 0) {
-                       sz = (guint) sz; /* Round to number */
-                       ret = TRUE;
-               }
-               else if (memcmp(suffix, "em", 2) == 0) {
-                       /* EM is 16 px, so multiply and round */
-                       sz = (guint) (sz * 16.0);
-                       ret = TRUE;
-               }
-               else if (len >= 3 && memcmp(suffix, "rem", 3) == 0) {
-                       /* equal to EM in our case */
-                       sz = (guint) (sz * 16.0);
-                       ret = TRUE;
-               }
-               else if (memcmp(suffix, "ex", 2) == 0) {
-                       /*
-                        * Represents the x-height of the element's font.
-                        * On fonts with the "x" letter, this is generally the height
-                        * of lowercase letters in the font; 1ex = 0.5em in many fonts.
-                        */
-                       sz = (guint) (sz * 8.0);
-                       ret = TRUE;
-               }
-               else if (memcmp(suffix, "vw", 2) == 0) {
-                       /*
-                        * Vewport width in percentages:
-                        * we assume 1% of viewport width as 8px
-                        */
-                       sz = (guint) (sz * 8.0);
-                       ret = TRUE;
-               }
-               else if (memcmp(suffix, "vh", 2) == 0) {
-                       /*
-                        * Vewport height in percentages
-                        * we assume 1% of viewport width as 6px
-                        */
-                       sz = (guint) (sz * 6.0);
-                       ret = TRUE;
-               }
-               else if (len >= 4 && memcmp(suffix, "vmax", 4) == 0) {
-                       /*
-                        * Vewport width in percentages
-                        * we assume 1% of viewport width as 6px
-                        */
-                       sz = (guint) (sz * 8.0);
-                       ret = TRUE;
-               }
-               else if (len >= 4 && memcmp(suffix, "vmin", 4) == 0) {
-                       /*
-                        * Vewport height in percentages
-                        * we assume 1% of viewport width as 6px
-                        */
-                       sz = (guint) (sz * 6.0);
-                       ret = TRUE;
-               }
-               else if (memcmp(suffix, "pt", 2) == 0) {
-                       sz = (guint) (sz * 96.0 / 72.0); /* One point. 1pt = 1/72nd of 1in */
-                       ret = TRUE;
-               }
-               else if (memcmp(suffix, "cm", 2) == 0) {
-                       sz = (guint) (sz * 96.0 / 2.54); /* 96px/2.54 */
-                       ret = TRUE;
-               }
-               else if (memcmp(suffix, "mm", 2) == 0) {
-                       sz = (guint) (sz * 9.6 / 2.54); /* 9.6px/2.54 */
-                       ret = TRUE;
-               }
-               else if (memcmp(suffix, "in", 2) == 0) {
-                       sz = (guint) (sz * 96.0); /* 96px */
-                       ret = TRUE;
-               }
-               else if (memcmp(suffix, "pc", 2) == 0) {
-                       sz = (guint) (sz * 96.0 / 6.0); /* 1pc = 12pt = 1/6th of 1in. */
-                       ret = TRUE;
-               }
-       }
-       else if (suffix[0] == '%') {
-               /* Percentages from 16 px */
-               sz = (guint) (sz / 100.0 * 16.0);
-               ret = TRUE;
-       }
-
-       if (ret) {
-               *tgt = sz;
-       }
-
-       return ret;
-}
-
-static void
-rspamd_html_process_font_size(const gchar *line, guint len, guint *fs,
-                                                         gboolean is_css) {
-       const gchar *p = line, *end = line + len;
-       gchar *err = NULL, numbuf[64];
-       gdouble sz = 0;
-       gboolean failsafe = FALSE;
-
-       while (p < end && g_ascii_isspace (*p)) {
-               p++;
-               len--;
-       }
-
-       if (g_ascii_isdigit (*p)) {
-               rspamd_strlcpy(numbuf, p, MIN (sizeof(numbuf), len + 1));
-               sz = strtod(numbuf, &err);
-
-               /* Now check leftover */
-               if (sz < 0) {
-                       sz = 0;
-               }
-       }
-       else {
-               /* Ignore the rest */
-               failsafe = TRUE;
-               sz = is_css ? 16 : 1;
-               /* TODO: add textual fonts descriptions */
-       }
-
-       if (err && *err != '\0') {
-               const gchar *e = err;
-               gsize slen;
-
-               /* Skip spaces */
-               while (*e && g_ascii_isspace (*e)) {
-                       e++;
-               }
-
-               /* Lowercase */
-               slen = strlen(e);
-               rspamd_str_lc((gchar *) e, slen);
-
-               if (!rspamd_html_process_css_size(e, slen, &sz)) {
-                       failsafe = TRUE;
-               }
-       }
-       else {
-               /* Failsafe naked number */
-               failsafe = TRUE;
-       }
-
-       if (failsafe) {
-               if (is_css) {
-                       /*
-                        * In css mode we usually ignore sizes, but let's treat
-                        * small sizes specially
-                        */
-                       if (sz < 1) {
-                               sz = 0;
-                       }
-                       else {
-                               sz = 16; /* Ignore */
-                       }
-               }
-               else {
-                       /* In non-css mode we have to check legacy size */
-                       sz = sz >= 1 ? sz * 16 : 16;
-               }
-       }
-
-       if (sz > 32) {
-               sz = 32;
-       }
-
-       *fs = sz;
-}
-
-static void
-rspamd_html_process_style(rspamd_mempool_t *pool, struct html_block *bl,
-                                                 struct html_content *hc, const gchar *style, guint len) {
-       const gchar *p, *c, *end, *key = NULL;
-       enum {
-               read_key,
-               read_colon,
-               read_value,
-               skip_spaces,
-       } state = skip_spaces, next_state = read_key;
-       guint klen = 0;
-       gdouble opacity = 1.0;
-
-       p = style;
-       c = p;
-       end = p + len;
-
-       while (p <= end) {
-               switch (state) {
-               case read_key:
-                       if (p == end || *p == ':') {
-                               key = c;
-                               klen = p - c;
-                               state = skip_spaces;
-                               next_state = read_value;
-                       }
-                       else if (g_ascii_isspace (*p)) {
-                               key = c;
-                               klen = p - c;
-                               state = skip_spaces;
-                               next_state = read_colon;
-                       }
-
-                       p++;
-                       break;
-
-               case read_colon:
-                       if (p == end || *p == ':') {
-                               state = skip_spaces;
-                               next_state = read_value;
-                       }
-
-                       p++;
-                       break;
-
-               case read_value:
-                       if (p == end || *p == ';') {
-                               if (key && klen && p - c > 0) {
-                                       if ((klen == 5 && g_ascii_strncasecmp(key, "color", 5) == 0)
-                                               || (klen == 10 && g_ascii_strncasecmp(key, "font-color", 10) == 0)) {
-
-                                               rspamd_html_process_color(c, p - c, &bl->font_color);
-                                               msg_debug_html ("got color: %xd", bl->font_color.d.val);
-                                       }
-                                       else if ((klen == 16 && g_ascii_strncasecmp(key,
-                                                       "background-color", 16) == 0) ||
-                                                        (klen == 10 && g_ascii_strncasecmp(key,
-                                                                        "background", 10) == 0)) {
-
-                                               rspamd_html_process_color(c, p - c, &bl->background_color);
-                                               msg_debug_html ("got bgcolor: %xd", bl->background_color.d.val);
-                                       }
-                                       else if (klen == 7 && g_ascii_strncasecmp(key, "display", 7) == 0) {
-                                               if (p - c >= 4 && rspamd_substring_search_caseless(c, p - c,
-                                                               "none", 4) != -1) {
-                                                       bl->visible = FALSE;
-                                                       msg_debug_html ("tag is not visible");
-                                               }
-                                       }
-                                       else if (klen == 9 &&
-                                                        g_ascii_strncasecmp(key, "font-size", 9) == 0) {
-                                               rspamd_html_process_font_size(c, p - c,
-                                                               &bl->font_size, TRUE);
-                                               msg_debug_html ("got font size: %ud", bl->font_size);
-                                       }
-                                       else if (klen == 7 &&
-                                                        g_ascii_strncasecmp(key, "opacity", 7) == 0) {
-                                               gchar numbuf[64];
-
-                                               rspamd_strlcpy(numbuf, c,
-                                                               MIN (sizeof(numbuf), p - c + 1));
-                                               opacity = strtod(numbuf, NULL);
-
-                                               if (opacity > 1) {
-                                                       opacity = 1;
-                                               }
-                                               else if (opacity < 0) {
-                                                       opacity = 0;
-                                               }
-
-                                               bl->font_color.d.comp.alpha = (guint8) (opacity * 255.0);
-                                       }
-                                       else if (klen == 10 &&
-                                                        g_ascii_strncasecmp(key, "visibility", 10) == 0) {
-                                               if (p - c >= 6 && rspamd_substring_search_caseless(c,
-                                                               p - c,
-                                                               "hidden", 6) != -1) {
-                                                       bl->visible = FALSE;
-                                                       msg_debug_html ("tag is not visible");
-                                               }
-                                       }
-                               }
-
-                               key = NULL;
-                               klen = 0;
-                               state = skip_spaces;
-                               next_state = read_key;
-                       }
-
-                       p++;
-                       break;
-
-               case skip_spaces:
-                       if (p < end && !g_ascii_isspace (*p)) {
-                               c = p;
-                               state = next_state;
-                       }
-                       else {
-                               p++;
-                       }
-
-                       break;
-               }
-       }
-}
-
-static void
-rspamd_html_process_block_tag(rspamd_mempool_t *pool, struct html_tag *tag,
-                                                         struct html_content *hc) {
-       struct html_tag_component *comp;
-       struct html_block *bl;
-       rspamd_ftok_t fstr;
-       GList *cur;
-
-       cur = tag->params->head;
-       bl = rspamd_mempool_alloc0 (pool, sizeof(*bl));
-       bl->tag = tag;
-       bl->visible = TRUE;
-       bl->font_size = (guint) -1;
-       bl->font_color.d.comp.alpha = 255;
-
-       while (cur) {
-               comp = cur->data;
-
-               if (comp->len > 0) {
-                       switch (comp->type) {
-                       case RSPAMD_HTML_COMPONENT_COLOR:
-                               fstr.begin = (gchar *) comp->start;
-                               fstr.len = comp->len;
-                               rspamd_html_process_color(comp->start, comp->len,
-                                               &bl->font_color);
-                               msg_debug_html ("tag %*s; got color: %xd",
-                                               tag->name.len, tag->name.start, bl->font_color.d.val);
-                               break;
-                       case RSPAMD_HTML_COMPONENT_BGCOLOR:
-                               fstr.begin = (gchar *) comp->start;
-                               fstr.len = comp->len;
-                               rspamd_html_process_color(comp->start, comp->len,
-                                               &bl->background_color);
-                               msg_debug_html ("tag %*s; got color: %xd",
-                                               tag->name.len, tag->name.start, bl->font_color.d.val);
-
-                               if (tag->id == Tag_BODY) {
-                                       /* Set global background color */
-                                       memcpy(&hc->bgcolor, &bl->background_color,
-                                                       sizeof(hc->bgcolor));
-                               }
-                               break;
-                       case RSPAMD_HTML_COMPONENT_STYLE:
-                               bl->style.len = comp->len;
-                               bl->style.start = comp->start;
-                               msg_debug_html ("tag: %*s; got style: %*s",
-                                               tag->name.len, tag->name.start,
-                                               (gint) bl->style.len, bl->style.start);
-                               rspamd_html_process_style(pool, bl, hc, comp->start, comp->len);
-                               break;
-                       case RSPAMD_HTML_COMPONENT_CLASS:
-                               fstr.begin = (gchar *) comp->start;
-                               fstr.len = comp->len;
-                               bl->html_class = rspamd_mempool_ftokdup (pool, &fstr);
-                               msg_debug_html ("tag: %*s; got class: %s",
-                                               tag->name.len, tag->name.start, bl->html_class);
-                               break;
-                       case RSPAMD_HTML_COMPONENT_SIZE:
-                               /* Not supported by html5 */
-                               /* FIXME maybe support it */
-                               bl->font_size = 16;
-                               msg_debug_html ("tag %*s; got size: %*s",
-                                               tag->name.len, tag->name.start,
-                                               (gint) comp->len, comp->start);
-                               break;
-                       default:
-                               /* NYI */
-                               break;
-                       }
-               }
-
-               cur = g_list_next (cur);
-       }
-
-       if (hc->blocks == NULL) {
-               hc->blocks = g_ptr_array_sized_new(64);
-               rspamd_mempool_notify_alloc (pool, 64 * sizeof(gpointer) + sizeof(GPtrArray));
-               rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard,
-                               hc->blocks);
-       }
-
-       g_ptr_array_add(hc->blocks, bl);
-       tag->extra = bl;
-}
-
-static void
-rspamd_html_check_displayed_url(rspamd_mempool_t *pool,
-                                                               GList **exceptions,
-                                                               khash_t (rspamd_url_hash) *url_set,
-                                                               GByteArray *dest,
-                                                               gint href_offset,
-                                                               struct rspamd_url *url) {
-       struct rspamd_url *displayed_url = NULL;
-       struct rspamd_url *turl;
-       gboolean url_found = FALSE;
-       struct rspamd_process_exception *ex;
-       guint saved_flags = 0;
-       gsize dlen;
-
-       if (href_offset < 0) {
-               /* No dispalyed url, just some text within <a> tag */
-               return;
-       }
-
-       url->visible_part = rspamd_mempool_alloc (pool, dest->len - href_offset + 1);
-       rspamd_strlcpy(url->visible_part, dest->data + href_offset,
-                       dest->len - href_offset + 1);
-       dlen = dest->len - href_offset;
-
-       /* Strip unicode spaces from the start and the end */
-       url->visible_part = rspamd_string_unicode_trim_inplace(url->visible_part,
-                       &dlen);
-       rspamd_html_url_is_phished(pool, url,
-                       url->visible_part,
-                       dlen,
-                       &url_found, &displayed_url);
-
-       if (url_found) {
-               url->flags |= saved_flags | RSPAMD_URL_FLAG_DISPLAY_URL;
-       }
-
-       if (exceptions && url_found) {
-               ex = rspamd_mempool_alloc (pool,
-                               sizeof(*ex));
-               ex->pos = href_offset;
-               ex->len = dest->len - href_offset;
-               ex->type = RSPAMD_EXCEPTION_URL;
-               ex->ptr = url;
-
-               *exceptions = g_list_prepend(*exceptions,
-                               ex);
-       }
-
-       if (displayed_url && url_set) {
-               turl = rspamd_url_set_add_or_return(url_set,
-                               displayed_url);
-
-               if (turl != NULL) {
-                       /* Here, we assume the following:
-                        * if we have a URL in the text part which
-                        * is the same as displayed URL in the
-                        * HTML part, we assume that it is also
-                        * hint only.
-                        */
-                       if (turl->flags &
-                               RSPAMD_URL_FLAG_FROM_TEXT) {
-                               turl->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
-                               turl->flags &= ~RSPAMD_URL_FLAG_FROM_TEXT;
-                       }
-
-                       turl->count++;
-               }
-               else {
-                       /* Already inserted by `rspamd_url_set_add_or_return` */
-               }
-       }
-
-       rspamd_normalise_unicode_inplace(url->visible_part, &dlen);
-}
-
-static gboolean
-rspamd_html_propagate_lengths(GNode *node, gpointer _unused) {
-       GNode *child;
-       struct html_tag *tag = node->data, *cld_tag;
-
-       if (tag) {
-               child = node->children;
-
-               /* Summarize content length from children */
-               while (child) {
-                       cld_tag = child->data;
-                       tag->content_length += cld_tag->content_length;
-                       child = child->next;
-               }
-       }
-
-       return FALSE;
-}
-
-static void
-rspamd_html_propagate_style(struct html_content *hc,
-                                                       struct html_tag *tag,
-                                                       struct html_block *bl,
-                                                       GQueue *blocks) {
-       struct html_block *bl_parent;
-       gboolean push_block = FALSE;
-
-
-       /* Propagate from the parent if needed */
-       bl_parent = g_queue_peek_tail(blocks);
-
-       if (bl_parent) {
-               if (!bl->background_color.valid) {
-                       /* Try to propagate background color from parent nodes */
-                       if (bl_parent->background_color.valid) {
-                               memcpy(&bl->background_color, &bl_parent->background_color,
-                                               sizeof(bl->background_color));
-                       }
-               }
-               else {
-                       push_block = TRUE;
-               }
-
-               if (!bl->font_color.valid) {
-                       /* Try to propagate background color from parent nodes */
-                       if (bl_parent->font_color.valid) {
-                               memcpy(&bl->font_color, &bl_parent->font_color,
-                                               sizeof(bl->font_color));
-                       }
-               }
-               else {
-                       push_block = TRUE;
-               }
-
-               /* Propagate font size */
-               if (bl->font_size == (guint) -1) {
-                       if (bl_parent->font_size != (guint) -1) {
-                               bl->font_size = bl_parent->font_size;
-                       }
-               }
-               else {
-                       push_block = TRUE;
-               }
-       }
-
-       /* Set bgcolor to the html bgcolor and font color to black as a last resort */
-       if (!bl->font_color.valid) {
-               /* Don't touch opacity as it can be set separately */
-               bl->font_color.d.comp.r = 0;
-               bl->font_color.d.comp.g = 0;
-               bl->font_color.d.comp.b = 0;
-               bl->font_color.valid = TRUE;
-       }
-       else {
-               push_block = TRUE;
-       }
-
-       if (!bl->background_color.valid) {
-               memcpy(&bl->background_color, &hc->bgcolor, sizeof(hc->bgcolor));
-       }
-       else {
-               push_block = TRUE;
-       }
-
-       if (bl->font_size == (guint) -1) {
-               bl->font_size = 16; /* Default for browsers */
-       }
-       else {
-               push_block = TRUE;
-       }
-
-       if (push_block && !(tag->flags & FL_CLOSED)) {
-               g_queue_push_tail(blocks, bl);
-       }
-}
-
-}
-
-GByteArray*
-rspamd_html_process_part_full (rspamd_mempool_t *pool,
-                                                          struct html_content *hc,
-                                                          GByteArray *in,
-                                                          GList **exceptions,
-                                                          khash_t (rspamd_url_hash) *url_set,
-                                                          GPtrArray *part_urls,
-                                                          bool allow_css)
-{
-       const guchar *p, *c, *end, *savep = NULL;
-       guchar t;
-       gboolean closing = FALSE, need_decode = FALSE, save_space = FALSE,
-                       balanced;
-       GByteArray *dest;
-       guint obrace = 0, ebrace = 0;
-       GNode *cur_level = NULL;
-       gint substate = 0, len, href_offset = -1;
-       struct html_tag *cur_tag = NULL, *content_tag = NULL;
-       struct rspamd_url *url = NULL;
-       GQueue *styles_blocks;
-
-       enum {
-               parse_start = 0,
-               tag_begin,
-               sgml_tag,
-               xml_tag,
-               compound_tag,
-               comment_tag,
-               comment_content,
-               sgml_content,
-               tag_content,
-               tag_end,
-               xml_tag_end,
-               content_ignore,
-               content_write,
-               content_style,
-               content_ignore_sp
-       } state = parse_start;
-
-       g_assert (in != NULL);
-       g_assert (hc != NULL);
-       g_assert (pool != NULL);
-
-       rspamd_html_library_init ();
-       hc->tags_seen = rspamd_mempool_alloc0 (pool, NBYTES (N_TAGS));
-
-       /* Set white background color by default */
-       hc->bgcolor.d.comp.alpha = 0;
-       hc->bgcolor.d.comp.r = 255;
-       hc->bgcolor.d.comp.g = 255;
-       hc->bgcolor.d.comp.b = 255;
-       hc->bgcolor.valid = TRUE;
-
-       dest = g_byte_array_sized_new (in->len / 3 * 2);
-       styles_blocks = g_queue_new ();
-
-       p = in->data;
-       c = p;
-       end = p + in->len;
-
-       while (p < end) {
-               t = *p;
-
-               switch (state) {
-               case parse_start:
-                       if (t == '<') {
-                               state = tag_begin;
-                       }
-                       else {
-                               /* We have no starting tag, so assume that it's content */
-                               hc->flags |= RSPAMD_HTML_FLAG_BAD_START;
-                               state = content_write;
-                       }
-
-                       break;
-               case tag_begin:
-                       switch (t) {
-                       case '<':
-                               p ++;
-                               closing = FALSE;
-                               break;
-                       case '!':
-                               state = sgml_tag;
-                               p ++;
-                               break;
-                       case '?':
-                               state = xml_tag;
-                               hc->flags |= RSPAMD_HTML_FLAG_XML;
-                               p ++;
-                               break;
-                       case '/':
-                               closing = TRUE;
-                               p ++;
-                               break;
-                       case '>':
-                               /* Empty tag */
-                               hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
-                               state = tag_end;
-                               continue;
-                       default:
-                               state = tag_content;
-                               substate = 0;
-                               savep = NULL;
-                               cur_tag = rspamd_mempool_alloc0 (pool, sizeof (*cur_tag));
-                               cur_tag->params = g_queue_new ();
-                               rspamd_mempool_add_destructor (pool,
-                                               (rspamd_mempool_destruct_t)g_queue_free, cur_tag->params);
-                               break;
-                       }
-
-                       break;
-
-               case sgml_tag:
-                       switch (t) {
-                       case '[':
-                               state = compound_tag;
-                               obrace = 1;
-                               ebrace = 0;
-                               p ++;
-                               break;
-                       case '-':
-                               state = comment_tag;
-                               p ++;
-                               break;
-                       default:
-                               state = sgml_content;
-                               break;
-                       }
-
-                       break;
-
-               case xml_tag:
-                       if (t == '?') {
-                               state = xml_tag_end;
-                       }
-                       else if (t == '>') {
-                               /* Misformed xml tag */
-                               hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
-                               state = tag_end;
-                               continue;
-                       }
-                       /* We efficiently ignore xml tags */
-                       p ++;
-                       break;
-
-               case xml_tag_end:
-                       if (t == '>') {
-                               state = tag_end;
-                               continue;
-                       }
-                       else {
-                               hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
-                               p ++;
-                       }
-                       break;
-
-               case compound_tag:
-                       if (t == '[') {
-                               obrace ++;
-                       }
-                       else if (t == ']') {
-                               ebrace ++;
-                       }
-                       else if (t == '>' && obrace == ebrace) {
-                               state = tag_end;
-                               continue;
-                       }
-                       p ++;
-                       break;
-
-               case comment_tag:
-                       if (t != '-')  {
-                               hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
-                               state = tag_end;
-                       }
-                       else {
-                               p++;
-                               ebrace = 0;
-                               /*
-                                * https://www.w3.org/TR/2012/WD-html5-20120329/syntax.html#syntax-comments
-                                *  ... the text must not start with a single
-                                *  U+003E GREATER-THAN SIGN character (>),
-                                *  nor start with a "-" (U+002D) character followed by
-                                *  a U+003E GREATER-THAN SIGN (>) character,
-                                *  nor contain two consecutive U+002D HYPHEN-MINUS
-                                *  characters (--), nor end with a "-" (U+002D) character.
-                                */
-                               if (p[0] == '-' && p + 1 < end && p[1] == '>') {
-                                       hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
-                                       p ++;
-                                       state = tag_end;
-                               }
-                               else if (*p == '>') {
-                                       hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
-                                       state = tag_end;
-                               }
-                               else {
-                                       state = comment_content;
-                               }
-                       }
-                       break;
-
-               case comment_content:
-                       if (t == '-') {
-                               ebrace ++;
-                       }
-                       else if (t == '>' && ebrace >= 2) {
-                               state = tag_end;
-                               continue;
-                       }
-                       else {
-                               ebrace = 0;
-                       }
-
-                       p ++;
-                       break;
-
-               case content_ignore:
-                       if (t != '<') {
-                               p ++;
-                       }
-                       else {
-                               state = tag_begin;
-                       }
-                       break;
-
-               case content_write:
-
-                       if (t != '<') {
-                               if (t == '&') {
-                                       need_decode = TRUE;
-                               }
-                               else if (g_ascii_isspace (t)) {
-                                       save_space = TRUE;
-
-                                       if (p > c) {
-                                               if (need_decode) {
-                                                       goffset old_offset = dest->len;
-
-                                                       if (content_tag) {
-                                                               if (content_tag->content_length == 0) {
-                                                                       content_tag->content_offset = old_offset;
-                                                               }
-                                                       }
-
-                                                       g_byte_array_append (dest, c, (p - c));
-
-                                                       len = rspamd_html_decode_entitles_inplace (
-                                                                       dest->data + old_offset,
-                                                                       p - c);
-                                                       dest->len = dest->len + len - (p - c);
-
-                                                       if (content_tag) {
-                                                               content_tag->content_length += len;
-                                                       }
-                                               }
-                                               else {
-                                                       len = p - c;
-
-                                                       if (content_tag) {
-                                                               if (content_tag->content_length == 0) {
-                                                                       content_tag->content_offset = dest->len;
-                                                               }
-
-                                                               content_tag->content_length += len;
-                                                       }
-
-                                                       g_byte_array_append (dest, c, len);
-                                               }
-                                       }
-
-                                       c = p;
-                                       state = content_ignore_sp;
-                               }
-                               else {
-                                       if (save_space) {
-                                               /* Append one space if needed */
-                                               if (dest->len > 0 &&
-                                                               !g_ascii_isspace (dest->data[dest->len - 1])) {
-                                                       g_byte_array_append (dest, " ", 1);
-                                                       if (content_tag) {
-                                                               if (content_tag->content_length == 0) {
-                                                                       /*
-                                                                        * Special case
-                                                                        * we have a space at the beginning but
-                                                                        * we have no set content_offset
-                                                                        * so we need to do it here
-                                                                        */
-                                                                       content_tag->content_offset = dest->len;
-                                                               }
-                                                               else {
-                                                                       content_tag->content_length++;
-                                                               }
-                                                       }
-                                               }
-                                               save_space = FALSE;
-                                       }
-                               }
-                       }
-                       else {
-                               if (c != p) {
-
-                                       if (need_decode) {
-                                               goffset old_offset = dest->len;
-
-                                               if (content_tag) {
-                                                       if (content_tag->content_length == 0) {
-                                                               content_tag->content_offset = dest->len;
-                                                       }
-                                               }
-
-                                               g_byte_array_append (dest, c, (p - c));
-                                               len = rspamd_html_decode_entitles_inplace (
-                                                               dest->data + old_offset,
-                                                               p - c);
-                                               dest->len = dest->len + len - (p - c);
-
-                                               if (content_tag) {
-                                                       content_tag->content_length += len;
-                                               }
-                                       }
-                                       else {
-                                               len = p - c;
-
-                                               if (content_tag) {
-                                                       if (content_tag->content_length == 0) {
-                                                               content_tag->content_offset = dest->len;
-                                                       }
-
-                                                       content_tag->content_length += len;
-                                               }
-
-                                               g_byte_array_append (dest, c, len);
-                                       }
-                               }
-
-                               content_tag = NULL;
-
-                               state = tag_begin;
-                               continue;
-                       }
-
-                       p ++;
-                       break;
-
-               case content_style: {
-
-                       /*
-                        * We just search for the first </s substring and then pass
-                        * the content to the parser (if needed)
-                        */
-                       goffset end_style = rspamd_substring_search (p, end - p,
-                                       "</", 2);
-                       if (end_style == -1 || g_ascii_tolower (p[end_style + 2]) != 's') {
-                               /* Invalid style */
-                               state = content_ignore;
-                       }
-                       else {
-
-                               if (allow_css) {
-                                       GError *err = NULL;
-                                       hc->css_style = rspamd_css_parse_style (pool, p, end_style, hc->css_style,
-                                                       &err);
-
-                                       if (err) {
-                                               msg_info_pool ("cannot parse css: %e", err);
-                                               g_error_free (err);
-                                       }
-                               }
-
-                               p += end_style;
-                               state = tag_begin;
-                       }
-                       break;
-               }
-
-               case content_ignore_sp:
-                       if (!g_ascii_isspace (t)) {
-                               c = p;
-                               state = content_write;
-                               continue;
-                       }
-
-                       p ++;
-                       break;
-
-               case sgml_content:
-                       /* TODO: parse DOCTYPE here */
-                       if (t == '>') {
-                               state = tag_end;
-                               /* We don't know a lot about sgml tags, ignore them */
-                               cur_tag = NULL;
-                               continue;
-                       }
-                       p ++;
-                       break;
-
-               case tag_content:
-                       rspamd_html_parse_tag_content (pool, hc, cur_tag,
-                                       p, &substate, &savep);
-                       if (t == '>') {
-                               if (closing) {
-                                       cur_tag->flags |= FL_CLOSING;
-
-                                       if (cur_tag->flags & FL_CLOSED) {
-                                               /* Bad mix of closed and closing */
-                                               hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
-                                       }
-
-                                       closing = FALSE;
-                               }
-
-                               state = tag_end;
-                               continue;
-                       }
-                       p ++;
-                       break;
-
-               case tag_end:
-                       substate = 0;
-                       savep = NULL;
-
-                       if (cur_tag != NULL) {
-                               balanced = TRUE;
-
-                               if (rspamd_html_process_tag (pool, hc, cur_tag, &cur_level,
-                                               &balanced)) {
-                                       state = content_write;
-                                       need_decode = FALSE;
-                               }
-                               else {
-                                       if (cur_tag->id == Tag_STYLE) {
-                                               state = content_style;
-                                       }
-                                       else {
-                                               state = content_ignore;
-                                       }
-                               }
-
-                               if (cur_tag->id != -1 && cur_tag->id < N_TAGS) {
-                                       if (cur_tag->flags & CM_UNIQUE) {
-                                               if (isset (hc->tags_seen, cur_tag->id)) {
-                                                       /* Duplicate tag has been found */
-                                                       hc->flags |= RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS;
-                                               }
-                                       }
-                                       setbit (hc->tags_seen, cur_tag->id);
-                               }
-
-                               if (!(cur_tag->flags & (FL_CLOSED|FL_CLOSING))) {
-                                       content_tag = cur_tag;
-                               }
-
-                               /* Handle newlines */
-                               if (cur_tag->id == Tag_BR || cur_tag->id == Tag_HR) {
-                                       if (dest->len > 0 && dest->data[dest->len - 1] != '\n') {
-                                               g_byte_array_append (dest, "\r\n", 2);
-
-                                               if (content_tag) {
-                                                       if (content_tag->content_length == 0) {
-                                                               /*
-                                                                * Special case
-                                                                * we have a \r\n at the beginning but
-                                                                * we have no set content_offset
-                                                                * so we need to do it here
-                                                                */
-                                                               content_tag->content_offset = dest->len;
-                                                       }
-                                                       else {
-                                                               content_tag->content_length += 2;
-                                                       }
-                                               }
-                                       }
-                                       save_space = FALSE;
-                               }
-
-                               if ((cur_tag->id == Tag_P ||
-                                               cur_tag->id == Tag_TR ||
-                                               cur_tag->id == Tag_DIV)) {
-                                       if (dest->len > 0 && dest->data[dest->len - 1] != '\n') {
-                                               g_byte_array_append (dest, "\r\n", 2);
-
-                                               if (content_tag) {
-                                                       if (content_tag->content_length == 0) {
-                                                               /*
-                                                                * Special case
-                                                                * we have a \r\n at the beginning but
-                                                                * we have no set content_offset
-                                                                * so we need to get it here
-                                                                */
-                                                               content_tag->content_offset = dest->len;
-                                                       }
-                                                       else {
-                                                               content_tag->content_length += 2;
-                                                       }
-                                               }
-                                       }
-                                       save_space = FALSE;
-                               }
-
-                               /* XXX: uncomment when styles parsing is not so broken */
-                               if (cur_tag->flags & FL_HREF /* && !(cur_tag->flags & FL_IGNORE) */) {
-                                       if (!(cur_tag->flags & (FL_CLOSING))) {
-                                               url = rspamd_html_process_url_tag (pool, cur_tag, hc);
-
-                                               if (url != NULL) {
-
-                                                       if (url_set != NULL) {
-                                                               struct rspamd_url *maybe_existing =
-                                                                               rspamd_url_set_add_or_return (url_set, url);
-                                                               if (maybe_existing == url) {
-                                                                       rspamd_process_html_url (pool, url, url_set,
-                                                                                       part_urls);
-                                                               }
-                                                               else {
-                                                                       url = maybe_existing;
-                                                                       /* Increase count to avoid odd checks failure */
-                                                                       url->count ++;
-                                                               }
-                                                       }
-
-                                                       href_offset = dest->len;
-                                               }
-                                       }
-
-                                       if (cur_tag->id == Tag_A) {
-                                               if (!balanced && cur_level && cur_level->prev) {
-                                                       struct html_tag *prev_tag;
-                                                       struct rspamd_url *prev_url;
-
-                                                       prev_tag = cur_level->prev->data;
-
-                                                       if (prev_tag->id == Tag_A &&
-                                                                       !(prev_tag->flags & (FL_CLOSING)) &&
-                                                                       prev_tag->extra) {
-                                                               prev_url = prev_tag->extra;
-
-                                                               rspamd_html_check_displayed_url (pool,
-                                                                               exceptions, url_set,
-                                                                               dest, href_offset,
-                                                                               prev_url);
-                                                       }
-                                               }
-
-                                               if (cur_tag->flags & (FL_CLOSING)) {
-
-                                                       /* Insert exception */
-                                                       if (url != NULL && (gint) dest->len > href_offset) {
-                                                               rspamd_html_check_displayed_url (pool,
-                                                                               exceptions, url_set,
-                                                                               dest, href_offset,
-                                                                               url);
-
-                                                       }
-
-                                                       href_offset = -1;
-                                                       url = NULL;
-                                               }
-                                       }
-                               }
-                               else if (cur_tag->id == Tag_BASE && !(cur_tag->flags & (FL_CLOSING))) {
-                                       /*
-                                        * Base is allowed only within head tag but HTML is retarded
-                                        */
-                                       if (hc->base_url == NULL) {
-                                               url = rspamd_html_process_url_tag (pool, cur_tag, hc);
-
-                                               if (url != NULL) {
-                                                       msg_debug_html ("got valid base tag");
-                                                       hc->base_url = url;
-                                                       cur_tag->extra = url;
-                                                       cur_tag->flags |= FL_HREF;
-                                               }
-                                               else {
-                                                       msg_debug_html ("got invalid base tag!");
-                                               }
-                                       }
-                               }
-
-                               if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) {
-                                       rspamd_html_process_img_tag (pool, cur_tag, hc, url_set,
-                                                       part_urls, dest);
-                               }
-                               else if (cur_tag->id == Tag_LINK && !(cur_tag->flags & FL_CLOSING)) {
-                                       rspamd_html_process_link_tag (pool, cur_tag, hc, url_set,
-                                                       part_urls);
-                               }
-                               else if (cur_tag->flags & FL_BLOCK) {
-                                       struct html_block *bl;
-
-                                       if (cur_tag->flags & FL_CLOSING) {
-                                               /* Just remove block element from the queue if any */
-                                               if (styles_blocks->length > 0) {
-                                                       g_queue_pop_tail (styles_blocks);
-                                               }
-                                       }
-                                       else {
-                                               rspamd_html_process_block_tag (pool, cur_tag, hc);
-                                               bl = cur_tag->extra;
-
-                                               if (bl) {
-                                                       rspamd_html_propagate_style (hc, cur_tag,
-                                                                       cur_tag->extra, styles_blocks);
-
-                                                       /* Check visibility */
-                                                       if (bl->font_size < 3 ||
-                                                               bl->font_color.d.comp.alpha < 10) {
-
-                                                               bl->visible = FALSE;
-                                                               msg_debug_html ("tag is not visible: font size: "
-                                                                                               "%d, alpha: %d",
-                                                                               (int)bl->font_size,
-                                                                               (int)bl->font_color.d.comp.alpha);
-                                                       }
-
-                                                       if (!bl->visible) {
-                                                               state = content_ignore;
-                                                       }
-                                               }
-                                       }
-                               }
-                       }
-                       else {
-                               state = content_write;
-                       }
-
-
-                       p++;
-                       c = p;
-                       cur_tag = NULL;
-                       break;
-               }
-       }
-
-       if (hc->html_tags) {
-               g_node_traverse (hc->html_tags, G_POST_ORDER, G_TRAVERSE_ALL, -1,
-                               rspamd_html_propagate_lengths, NULL);
-       }
-
-       g_queue_free (styles_blocks);
-       hc->parsed = dest;
-
-       return dest;
-}
-
-GByteArray*
-rspamd_html_process_part (rspamd_mempool_t *pool,
-               struct html_content *hc,
-               GByteArray *in)
-{
-       return rspamd_html_process_part_full (pool, hc, in, NULL,
-                       NULL, NULL, FALSE);
-}
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
new file mode 100644 (file)
index 0000000..d1f2da4
--- /dev/null
@@ -0,0 +1,2931 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "util.h"
+#include "rspamd.h"
+#include "message.h"
+#include "html.h"
+#include "html_tags.h"
+#include "html_colors.h"
+
+#include "url.h"
+#include "contrib/libucl/khash.h"
+#include "libmime/images.h"
+#include "css/css.h"
+#include "libutil/cxx/utf8_util.h"
+
+#include "html_tag_defs.hxx"
+#include "html_entities.hxx"
+
+#include <vector>
+
+#include <unicode/uversion.h>
+#include <unicode/ucnv.h>
+#if U_ICU_VERSION_MAJOR_NUM >= 46
+#include <unicode/uidna.h>
+#endif
+
+namespace rspamd::html {
+
+static const guint max_tags = 8192; /* Ignore tags if this maximum is reached */
+
+#define msg_debug_html(...)  rspamd_conditional_debug_fast (NULL, NULL, \
+        rspamd_html_log_id, "html", pool->tag.uid, \
+        G_STRFUNC, \
+        __VA_ARGS__)
+
+INIT_LOG_MODULE(html)
+
+
+[[maybe_unused]] static const html_tags_storage html_tags_defs;
+
+static struct rspamd_url *rspamd_html_process_url(rspamd_mempool_t *pool,
+                                                                                                 const gchar *start, guint len,
+                                                                                                 struct html_tag_component *comp);
+
+static gboolean
+rspamd_html_check_balance(GNode *node, GNode **cur_level)
+{
+       struct html_tag *arg = node->data, *tmp;
+       GNode *cur;
+
+       if (arg->flags & FL_CLOSING) {
+               /* First of all check whether this tag is closing tag for parent node */
+               cur = node->parent;
+               while (cur && cur->data) {
+                       tmp = cur->data;
+                       if (tmp->id == arg->id &&
+                               (tmp->flags & FL_CLOSED) == 0) {
+                               tmp->flags |= FL_CLOSED;
+                               /* Destroy current node as we find corresponding parent node */
+                               g_node_destroy(node);
+                               /* Change level */
+                               *cur_level = cur->parent;
+                               return TRUE;
+                       }
+                       cur = cur->parent;
+               }
+       }
+       else {
+               return TRUE;
+       }
+
+       return FALSE;
+}
+
+gint
+rspamd_html_tag_by_name(const gchar *name) {
+       khiter_t k;
+
+       k = kh_get (tag_by_name, html_tag_by_name, name);
+
+       if (k != kh_end (html_tag_by_name)) {
+               return kh_val (html_tag_by_name, k).id;
+       }
+
+       return -1;
+}
+
+gboolean
+rspamd_html_tag_seen(struct html_content *hc, const gchar *tagname) {
+       gint id;
+
+       g_assert (hc != NULL);
+       g_assert (hc->tags_seen != NULL);
+
+       id = rspamd_html_tag_by_name(tagname);
+
+       if (id != -1) {
+               return isset (hc->tags_seen, id);
+       }
+
+       return FALSE;
+}
+
+const gchar *
+rspamd_html_tag_by_id(gint id) {
+       khiter_t k;
+
+       k = kh_get (tag_by_id, html_tag_by_id, id);
+
+       if (k != kh_end (html_tag_by_id)) {
+               return kh_val (html_tag_by_id, k).name;
+       }
+
+       return NULL;
+}
+
+/* Decode HTML entitles in text */
+guint
+rspamd_html_decode_entitles_inplace(gchar *s, gsize len) {
+
+}
+
+static gboolean
+rspamd_url_is_subdomain(rspamd_ftok_t *t1, rspamd_ftok_t *t2) {
+       const gchar *p1, *p2;
+
+       p1 = t1->begin + t1->len - 1;
+       p2 = t2->begin + t2->len - 1;
+
+       /* Skip trailing dots */
+       while (p1 > t1->begin) {
+               if (*p1 != '.') {
+                       break;
+               }
+
+               p1--;
+       }
+
+       while (p2 > t2->begin) {
+               if (*p2 != '.') {
+                       break;
+               }
+
+               p2--;
+       }
+
+       while (p1 > t1->begin && p2 > t2->begin) {
+               if (*p1 != *p2) {
+                       break;
+               }
+
+               p1--;
+               p2--;
+       }
+
+       if (p2 == t2->begin) {
+               /* p2 can be subdomain of p1 if *p1 is '.' */
+               if (p1 != t1->begin && *(p1 - 1) == '.') {
+                       return TRUE;
+               }
+       }
+       else if (p1 == t1->begin) {
+               if (p2 != t2->begin && *(p2 - 1) == '.') {
+                       return TRUE;
+               }
+       }
+
+       return FALSE;
+}
+
+static void
+rspamd_html_url_is_phished(rspamd_mempool_t *pool,
+                                                  struct rspamd_url *href_url,
+                                                  const guchar *url_text,
+                                                  gsize len,
+                                                  gboolean *url_found,
+                                                  struct rspamd_url **ptext_url) {
+       struct rspamd_url *text_url;
+       rspamd_ftok_t disp_tok, href_tok;
+       gint rc;
+       goffset url_pos;
+       gchar *url_str = NULL, *idn_hbuf;
+       const guchar *end = url_text + len, *p;
+#if U_ICU_VERSION_MAJOR_NUM >= 46
+       static UIDNA *udn;
+       UErrorCode uc_err = U_ZERO_ERROR;
+       UIDNAInfo uinfo = UIDNA_INFO_INITIALIZER;
+#endif
+
+       *url_found = FALSE;
+#if U_ICU_VERSION_MAJOR_NUM >= 46
+       if (udn == NULL) {
+               udn = uidna_openUTS46(UIDNA_DEFAULT, &uc_err);
+
+               if (uc_err != U_ZERO_ERROR) {
+                       msg_err_pool ("cannot init idna converter: %s", u_errorName(uc_err));
+               }
+       }
+#endif
+
+       while (url_text < end && g_ascii_isspace (*url_text)) {
+               url_text++;
+       }
+
+       if (end > url_text + 4 &&
+               rspamd_url_find(pool, url_text, end - url_text, &url_str,
+                               RSPAMD_URL_FIND_ALL,
+                               &url_pos, NULL) &&
+               url_str != NULL) {
+               if (url_pos > 0) {
+                       /*
+                        * We have some url at some offset, so we need to check what is
+                        * at the start of the text
+                        */
+                       p = url_text;
+
+                       while (p < url_text + url_pos) {
+                               if (!g_ascii_isspace (*p)) {
+                                       *url_found = FALSE;
+                                       return;
+                               }
+
+                               p++;
+                       }
+               }
+
+               text_url = rspamd_mempool_alloc0 (pool, sizeof(struct rspamd_url));
+               rc = rspamd_url_parse(text_url, url_str, strlen(url_str), pool,
+                               RSPAMD_URL_PARSE_TEXT);
+
+               if (rc == URI_ERRNO_OK) {
+                       disp_tok.len = text_url->hostlen;
+                       disp_tok.begin = rspamd_url_host_unsafe (text_url);
+#if U_ICU_VERSION_MAJOR_NUM >= 46
+                       if (rspamd_substring_search_caseless(rspamd_url_host_unsafe (text_url),
+                                       text_url->hostlen, "xn--", 4) != -1) {
+                               idn_hbuf = rspamd_mempool_alloc (pool, text_url->hostlen * 2 + 1);
+                               /* We need to convert it to the normal value first */
+                               disp_tok.len = uidna_nameToUnicodeUTF8(udn,
+                                               rspamd_url_host_unsafe (text_url), text_url->hostlen,
+                                               idn_hbuf, text_url->hostlen * 2 + 1, &uinfo, &uc_err);
+
+                               if (uc_err != U_ZERO_ERROR) {
+                                       msg_err_pool ("cannot convert to IDN: %s",
+                                                       u_errorName(uc_err));
+                                       disp_tok.len = text_url->hostlen;
+                               }
+                               else {
+                                       disp_tok.begin = idn_hbuf;
+                               }
+                       }
+#endif
+                       href_tok.len = href_url->hostlen;
+                       href_tok.begin = rspamd_url_host_unsafe (href_url);
+#if U_ICU_VERSION_MAJOR_NUM >= 46
+                       if (rspamd_substring_search_caseless(rspamd_url_host_unsafe (href_url),
+                                       href_url->hostlen, "xn--", 4) != -1) {
+                               idn_hbuf = rspamd_mempool_alloc (pool, href_url->hostlen * 2 + 1);
+                               /* We need to convert it to the normal value first */
+                               href_tok.len = uidna_nameToUnicodeUTF8(udn,
+                                               rspamd_url_host_unsafe (href_url), href_url->hostlen,
+                                               idn_hbuf, href_url->hostlen * 2 + 1, &uinfo, &uc_err);
+
+                               if (uc_err != U_ZERO_ERROR) {
+                                       msg_err_pool ("cannot convert to IDN: %s",
+                                                       u_errorName(uc_err));
+                                       href_tok.len = href_url->hostlen;
+                               }
+                               else {
+                                       href_tok.begin = idn_hbuf;
+                               }
+                       }
+#endif
+                       if (rspamd_ftok_casecmp(&disp_tok, &href_tok) != 0 &&
+                               text_url->tldlen > 0 && href_url->tldlen > 0) {
+
+                               /* Apply the same logic for TLD */
+                               disp_tok.len = text_url->tldlen;
+                               disp_tok.begin = rspamd_url_tld_unsafe (text_url);
+#if U_ICU_VERSION_MAJOR_NUM >= 46
+                               if (rspamd_substring_search_caseless(rspamd_url_tld_unsafe (text_url),
+                                               text_url->tldlen, "xn--", 4) != -1) {
+                                       idn_hbuf = rspamd_mempool_alloc (pool, text_url->tldlen * 2 + 1);
+                                       /* We need to convert it to the normal value first */
+                                       disp_tok.len = uidna_nameToUnicodeUTF8(udn,
+                                                       rspamd_url_tld_unsafe (text_url), text_url->tldlen,
+                                                       idn_hbuf, text_url->tldlen * 2 + 1, &uinfo, &uc_err);
+
+                                       if (uc_err != U_ZERO_ERROR) {
+                                               msg_err_pool ("cannot convert to IDN: %s",
+                                                               u_errorName(uc_err));
+                                               disp_tok.len = text_url->tldlen;
+                                       }
+                                       else {
+                                               disp_tok.begin = idn_hbuf;
+                                       }
+                               }
+#endif
+                               href_tok.len = href_url->tldlen;
+                               href_tok.begin = rspamd_url_tld_unsafe (href_url);
+#if U_ICU_VERSION_MAJOR_NUM >= 46
+                               if (rspamd_substring_search_caseless(rspamd_url_tld_unsafe (href_url),
+                                               href_url->tldlen, "xn--", 4) != -1) {
+                                       idn_hbuf = rspamd_mempool_alloc (pool, href_url->tldlen * 2 + 1);
+                                       /* We need to convert it to the normal value first */
+                                       href_tok.len = uidna_nameToUnicodeUTF8(udn,
+                                                       rspamd_url_tld_unsafe (href_url), href_url->tldlen,
+                                                       idn_hbuf, href_url->tldlen * 2 + 1, &uinfo, &uc_err);
+
+                                       if (uc_err != U_ZERO_ERROR) {
+                                               msg_err_pool ("cannot convert to IDN: %s",
+                                                               u_errorName(uc_err));
+                                               href_tok.len = href_url->tldlen;
+                                       }
+                                       else {
+                                               href_tok.begin = idn_hbuf;
+                                       }
+                               }
+#endif
+                               if (rspamd_ftok_casecmp(&disp_tok, &href_tok) != 0) {
+                                       /* Check if one url is a subdomain for another */
+
+                                       if (!rspamd_url_is_subdomain(&disp_tok, &href_tok)) {
+                                               href_url->flags |= RSPAMD_URL_FLAG_PHISHED;
+                                               href_url->linked_url = text_url;
+                                               text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
+                                       }
+                               }
+                       }
+
+                       *ptext_url = text_url;
+                       *url_found = TRUE;
+               }
+               else {
+                       /*
+                        * We have found something that looks like an url but it was
+                        * not parsed correctly.
+                        * Sometimes it means an obfuscation attempt, so we have to check
+                        * what's inside of the text
+                        */
+                       gboolean obfuscation_found = FALSE;
+
+                       if (len > 4 && g_ascii_strncasecmp(url_text, "http", 4) == 0 &&
+                               rspamd_substring_search(url_text, len, "://", 3) != -1) {
+                               /* Clearly an obfuscation attempt */
+                               obfuscation_found = TRUE;
+                       }
+
+                       msg_info_pool ("extract of url '%s' failed: %s; obfuscation detected: %s",
+                                       url_str,
+                                       rspamd_url_strerror(rc),
+                                       obfuscation_found ? "yes" : "no");
+
+                       if (obfuscation_found) {
+                               href_url->flags |= RSPAMD_URL_FLAG_PHISHED | RSPAMD_URL_FLAG_OBSCURED;
+                       }
+               }
+       }
+
+}
+
+static gboolean
+rspamd_html_process_tag(rspamd_mempool_t *pool, struct html_content *hc,
+                                               struct html_tag *tag, GNode **cur_level, gboolean *balanced) {
+       GNode *nnode;
+       struct html_tag *parent;
+
+       if (hc->html_tags == NULL) {
+               nnode = g_node_new(NULL);
+               *cur_level = nnode;
+               hc->html_tags = nnode;
+               rspamd_mempool_add_destructor (pool,
+                               (rspamd_mempool_destruct_t) g_node_destroy,
+                               nnode);
+       }
+
+       if (hc->total_tags > max_tags) {
+               hc->flags |= RSPAMD_HTML_FLAG_TOO_MANY_TAGS;
+       }
+
+       if (tag->id == -1) {
+               /* Ignore unknown tags */
+               hc->total_tags++;
+               return FALSE;
+       }
+
+       tag->parent = *cur_level;
+
+       if (!(tag->flags & (CM_INLINE | CM_EMPTY))) {
+               /* Block tag */
+               if (tag->flags & (FL_CLOSING | FL_CLOSED)) {
+                       if (!*cur_level) {
+                               msg_debug_html ("bad parent node");
+                               return FALSE;
+                       }
+
+                       if (hc->total_tags < max_tags) {
+                               nnode = g_node_new(tag);
+                               g_node_append (*cur_level, nnode);
+
+                               if (!rspamd_html_check_balance(nnode, cur_level)) {
+                                       msg_debug_html (
+                                                       "mark part as unbalanced as it has not pairable closing tags");
+                                       hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED;
+                                       *balanced = FALSE;
+                               }
+                               else {
+                                       *balanced = TRUE;
+                               }
+
+                               hc->total_tags++;
+                       }
+               }
+               else {
+                       parent = (*cur_level)->data;
+
+                       if (parent) {
+                               if ((parent->flags & FL_IGNORE)) {
+                                       tag->flags |= FL_IGNORE;
+                               }
+
+                               if (!(tag->flags & FL_CLOSED) &&
+                                       !(parent->flags & FL_BLOCK)) {
+                                       /* We likely have some bad nesting */
+                                       if (parent->id == tag->id) {
+                                               /* Something like <a>bla<a>foo... */
+                                               hc->flags |= RSPAMD_HTML_FLAG_UNBALANCED;
+                                               *balanced = FALSE;
+                                               tag->parent = parent->parent;
+
+                                               if (hc->total_tags < max_tags) {
+                                                       nnode = g_node_new(tag);
+                                                       g_node_append (parent->parent, nnode);
+                                                       *cur_level = nnode;
+                                                       hc->total_tags++;
+                                               }
+
+                                               return TRUE;
+                                       }
+                               }
+                       }
+
+                       if (hc->total_tags < max_tags) {
+                               nnode = g_node_new(tag);
+                               g_node_append (*cur_level, nnode);
+
+                               if ((tag->flags & FL_CLOSED) == 0) {
+                                       *cur_level = nnode;
+                               }
+
+                               hc->total_tags++;
+                       }
+
+                       if (tag->flags & (CM_HEAD | CM_UNKNOWN | FL_IGNORE)) {
+                               tag->flags |= FL_IGNORE;
+
+                               return FALSE;
+                       }
+
+               }
+       }
+       else {
+               /* Inline tag */
+               parent = (*cur_level)->data;
+
+               if (parent) {
+                       if (hc->total_tags < max_tags) {
+                               nnode = g_node_new(tag);
+                               g_node_append (*cur_level, nnode);
+
+                               hc->total_tags++;
+                       }
+                       if ((parent->flags & (CM_HEAD | CM_UNKNOWN | FL_IGNORE))) {
+                               tag->flags |= FL_IGNORE;
+
+                               return FALSE;
+                       }
+               }
+       }
+
+       return TRUE;
+}
+
+#define NEW_COMPONENT(comp_type) do {                            \
+    comp = rspamd_mempool_alloc (pool, sizeof (*comp));            \
+    comp->type = (comp_type);                                    \
+    comp->start = NULL;                                            \
+    comp->len = 0;                                                \
+    g_queue_push_tail (tag->params, comp);                        \
+    ret = TRUE;                                                    \
+} while(0)
+
+static gboolean
+rspamd_html_parse_tag_component(rspamd_mempool_t *pool,
+                                                               const guchar *begin, const guchar *end,
+                                                               struct html_tag *tag) {
+       struct html_tag_component *comp;
+       gint len;
+       gboolean ret = FALSE;
+       gchar *p;
+
+       if (end <= begin) {
+               return FALSE;
+       }
+
+       p = rspamd_mempool_alloc (pool, end - begin);
+       memcpy(p, begin, end - begin);
+       len = rspamd_html_decode_entitles_inplace(p, end - begin);
+
+       if (len == 3) {
+               if (g_ascii_strncasecmp(p, "src", len) == 0) {
+                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF);
+               }
+               else if (g_ascii_strncasecmp(p, "rel", len) == 0) {
+                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_REL);
+               }
+               else if (g_ascii_strncasecmp(p, "alt", len) == 0) {
+                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_ALT);
+               }
+       }
+       else if (len == 4) {
+               if (g_ascii_strncasecmp(p, "href", len) == 0) {
+                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF);
+               }
+       }
+       else if (len == 6) {
+               if (g_ascii_strncasecmp(p, "action", len) == 0) {
+                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HREF);
+               }
+       }
+
+       if (tag->id == Tag_IMG) {
+               /* Check width and height if presented */
+               if (len == 5 && g_ascii_strncasecmp(p, "width", len) == 0) {
+                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_WIDTH);
+               }
+               else if (len == 6 && g_ascii_strncasecmp(p, "height", len) == 0) {
+                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_HEIGHT);
+               }
+               else if (g_ascii_strncasecmp(p, "style", len) == 0) {
+                       NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE);
+               }
+       }
+       else if (tag->id == Tag_FONT) {
+               if (len == 5) {
+                       if (g_ascii_strncasecmp(p, "color", len) == 0) {
+                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_COLOR);
+                       }
+                       else if (g_ascii_strncasecmp(p, "style", len) == 0) {
+                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE);
+                       }
+                       else if (g_ascii_strncasecmp(p, "class", len) == 0) {
+                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_CLASS);
+                       }
+               }
+               else if (len == 7) {
+                       if (g_ascii_strncasecmp(p, "bgcolor", len) == 0) {
+                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_BGCOLOR);
+                       }
+               }
+               else if (len == 4) {
+                       if (g_ascii_strncasecmp(p, "size", len) == 0) {
+                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_SIZE);
+                       }
+               }
+       }
+       else if (tag->flags & FL_BLOCK) {
+               if (len == 5) {
+                       if (g_ascii_strncasecmp(p, "color", len) == 0) {
+                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_COLOR);
+                       }
+                       else if (g_ascii_strncasecmp(p, "style", len) == 0) {
+                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_STYLE);
+                       }
+                       else if (g_ascii_strncasecmp(p, "class", len) == 0) {
+                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_CLASS);
+                       }
+               }
+               else if (len == 7) {
+                       if (g_ascii_strncasecmp(p, "bgcolor", len) == 0) {
+                               NEW_COMPONENT (RSPAMD_HTML_COMPONENT_BGCOLOR);
+                       }
+               }
+       }
+
+       return ret;
+}
+
+static inline void
+rspamd_html_parse_tag_content(rspamd_mempool_t *pool,
+                                                         struct html_content *hc, struct html_tag *tag, const guchar *in,
+                                                         gint *statep, guchar const **savep) {
+       enum {
+               parse_start = 0,
+               parse_name,
+               parse_attr_name,
+               parse_equal,
+               parse_start_dquote,
+               parse_dqvalue,
+               parse_end_dquote,
+               parse_start_squote,
+               parse_sqvalue,
+               parse_end_squote,
+               parse_value,
+               spaces_after_name,
+               spaces_before_eq,
+               spaces_after_eq,
+               spaces_after_param,
+               ignore_bad_tag
+       } state;
+       struct html_tag_def *found;
+       gboolean store = FALSE;
+       struct html_tag_component *comp;
+
+       state = *statep;
+
+       switch (state) {
+       case parse_start:
+               if (!g_ascii_isalpha (*in) && !g_ascii_isspace (*in)) {
+                       hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+                       state = ignore_bad_tag;
+                       tag->id = -1;
+                       tag->flags |= FL_BROKEN;
+               }
+               else if (g_ascii_isalpha (*in)) {
+                       state = parse_name;
+                       tag->name.start = in;
+               }
+               break;
+
+       case parse_name:
+               if (g_ascii_isspace (*in) || *in == '>' || *in == '/') {
+                       g_assert (in >= tag->name.start);
+
+                       if (*in == '/') {
+                               tag->flags |= FL_CLOSED;
+                       }
+
+                       tag->name.len = in - tag->name.start;
+
+                       if (tag->name.len == 0) {
+                               hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+                               tag->id = -1;
+                               tag->flags |= FL_BROKEN;
+                               state = ignore_bad_tag;
+                       }
+                       else {
+                               gchar *s;
+                               khiter_t k;
+                               /* We CANNOT safely modify tag's name here, as it is already parsed */
+
+                               s = rspamd_mempool_alloc (pool, tag->name.len + 1);
+                               memcpy(s, tag->name.start, tag->name.len);
+                               tag->name.len = rspamd_html_decode_entitles_inplace(s,
+                                               tag->name.len);
+                               tag->name.start = s;
+                               tag->name.len = rspamd_str_lc_utf8(s, tag->name.len);
+                               s[tag->name.len] = '\0';
+
+                               k = kh_get (tag_by_name, html_tag_by_name, s);
+
+                               if (k == kh_end (html_tag_by_name)) {
+                                       hc->flags |= RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS;
+                                       tag->id = -1;
+                               }
+                               else {
+                                       found = &kh_val (html_tag_by_name, k);
+                                       tag->id = found->id;
+                                       tag->flags = found->flags;
+                               }
+
+                               state = spaces_after_name;
+                       }
+               }
+               break;
+
+       case parse_attr_name:
+               if (*savep == NULL) {
+                       state = ignore_bad_tag;
+               }
+               else {
+                       const guchar *attr_name_end = in;
+
+                       if (*in == '=') {
+                               state = parse_equal;
+                       }
+                       else if (*in == '"') {
+                               /* No equal or something sane but we have quote character */
+                               state = parse_start_dquote;
+                               attr_name_end = in - 1;
+
+                               while (attr_name_end > *savep) {
+                                       if (!g_ascii_isalnum (*attr_name_end)) {
+                                               attr_name_end--;
+                                       }
+                                       else {
+                                               break;
+                                       }
+                               }
+
+                               /* One character forward to obtain length */
+                               attr_name_end++;
+                       }
+                       else if (g_ascii_isspace (*in)) {
+                               state = spaces_before_eq;
+                       }
+                       else if (*in == '/') {
+                               tag->flags |= FL_CLOSED;
+                       }
+                       else if (!g_ascii_isgraph (*in)) {
+                               state = parse_value;
+                               attr_name_end = in - 1;
+
+                               while (attr_name_end > *savep) {
+                                       if (!g_ascii_isalnum (*attr_name_end)) {
+                                               attr_name_end--;
+                                       }
+                                       else {
+                                               break;
+                                       }
+                               }
+
+                               /* One character forward to obtain length */
+                               attr_name_end++;
+                       }
+                       else {
+                               return;
+                       }
+
+                       if (!rspamd_html_parse_tag_component(pool, *savep, attr_name_end, tag)) {
+                               /* Ignore unknown params */
+                               *savep = NULL;
+                       }
+                       else if (state == parse_value) {
+                               *savep = in + 1;
+                       }
+               }
+
+               break;
+
+       case spaces_after_name:
+               if (!g_ascii_isspace (*in)) {
+                       *savep = in;
+                       if (*in == '/') {
+                               tag->flags |= FL_CLOSED;
+                       }
+                       else if (*in != '>') {
+                               state = parse_attr_name;
+                       }
+               }
+               break;
+
+       case spaces_before_eq:
+               if (*in == '=') {
+                       state = parse_equal;
+               }
+               else if (!g_ascii_isspace (*in)) {
+                       /*
+                        * HTML defines that crap could still be restored and
+                        * calculated somehow... So we have to follow this stupid behaviour
+                        */
+                       /*
+                        * TODO: estimate what insane things do email clients in each case
+                        */
+                       if (*in == '>') {
+                               /*
+                                * Attribtute name followed by end of tag
+                                * Should be okay (empty attribute). The rest is handled outside
+                                * this automata.
+                                */
+
+                       }
+                       else if (*in == '"' || *in == '\'') {
+                               /* Attribute followed by quote... Missing '=' ? Dunno, need to test */
+                               hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+                               tag->flags |= FL_BROKEN;
+                               state = ignore_bad_tag;
+                       }
+                       else {
+                               /*
+                                * Just start another attribute ignoring an empty attributes for
+                                * now. We don't use them in fact...
+                                */
+                               state = parse_attr_name;
+                               *savep = in;
+                       }
+               }
+               break;
+
+       case spaces_after_eq:
+               if (*in == '"') {
+                       state = parse_start_dquote;
+               }
+               else if (*in == '\'') {
+                       state = parse_start_squote;
+               }
+               else if (!g_ascii_isspace (*in)) {
+                       if (*savep != NULL) {
+                               /* We need to save this param */
+                               *savep = in;
+                       }
+                       state = parse_value;
+               }
+               break;
+
+       case parse_equal:
+               if (g_ascii_isspace (*in)) {
+                       state = spaces_after_eq;
+               }
+               else if (*in == '"') {
+                       state = parse_start_dquote;
+               }
+               else if (*in == '\'') {
+                       state = parse_start_squote;
+               }
+               else {
+                       if (*savep != NULL) {
+                               /* We need to save this param */
+                               *savep = in;
+                       }
+                       state = parse_value;
+               }
+               break;
+
+       case parse_start_dquote:
+               if (*in == '"') {
+                       if (*savep != NULL) {
+                               /* We have an empty attribute value */
+                               savep = NULL;
+                       }
+                       state = spaces_after_param;
+               }
+               else {
+                       if (*savep != NULL) {
+                               /* We need to save this param */
+                               *savep = in;
+                       }
+                       state = parse_dqvalue;
+               }
+               break;
+
+       case parse_start_squote:
+               if (*in == '\'') {
+                       if (*savep != NULL) {
+                               /* We have an empty attribute value */
+                               savep = NULL;
+                       }
+                       state = spaces_after_param;
+               }
+               else {
+                       if (*savep != NULL) {
+                               /* We need to save this param */
+                               *savep = in;
+                       }
+                       state = parse_sqvalue;
+               }
+               break;
+
+       case parse_dqvalue:
+               if (*in == '"') {
+                       store = TRUE;
+                       state = parse_end_dquote;
+               }
+
+               if (store) {
+                       if (*savep != NULL) {
+                               gchar *s;
+
+                               g_assert (tag->params != NULL);
+                               comp = g_queue_peek_tail(tag->params);
+                               g_assert (comp != NULL);
+                               comp->len = in - *savep;
+                               s = rspamd_mempool_alloc (pool, comp->len);
+                               memcpy(s, *savep, comp->len);
+                               comp->len = rspamd_html_decode_entitles_inplace(s, comp->len);
+                               comp->start = s;
+                               *savep = NULL;
+                       }
+               }
+               break;
+
+       case parse_sqvalue:
+               if (*in == '\'') {
+                       store = TRUE;
+                       state = parse_end_squote;
+               }
+               if (store) {
+                       if (*savep != NULL) {
+                               gchar *s;
+
+                               g_assert (tag->params != NULL);
+                               comp = g_queue_peek_tail(tag->params);
+                               g_assert (comp != NULL);
+                               comp->len = in - *savep;
+                               s = rspamd_mempool_alloc (pool, comp->len);
+                               memcpy(s, *savep, comp->len);
+                               comp->len = rspamd_html_decode_entitles_inplace(s, comp->len);
+                               comp->start = s;
+                               *savep = NULL;
+                       }
+               }
+               break;
+
+       case parse_value:
+               if (*in == '/' && *(in + 1) == '>') {
+                       tag->flags |= FL_CLOSED;
+                       store = TRUE;
+               }
+               else if (g_ascii_isspace (*in) || *in == '>' || *in == '"') {
+                       store = TRUE;
+                       state = spaces_after_param;
+               }
+
+               if (store) {
+                       if (*savep != NULL) {
+                               gchar *s;
+
+                               g_assert (tag->params != NULL);
+                               comp = g_queue_peek_tail(tag->params);
+                               g_assert (comp != NULL);
+                               comp->len = in - *savep;
+                               s = rspamd_mempool_alloc (pool, comp->len);
+                               memcpy(s, *savep, comp->len);
+                               comp->len = rspamd_html_decode_entitles_inplace(s, comp->len);
+                               comp->start = s;
+                               *savep = NULL;
+                       }
+               }
+               break;
+
+       case parse_end_dquote:
+       case parse_end_squote:
+               if (g_ascii_isspace (*in)) {
+                       state = spaces_after_param;
+               }
+               else if (*in == '/' && *(in + 1) == '>') {
+                       tag->flags |= FL_CLOSED;
+               }
+               else {
+                       /* No space, proceed immediately to the attribute name */
+                       state = parse_attr_name;
+                       *savep = in;
+               }
+               break;
+
+       case spaces_after_param:
+               if (!g_ascii_isspace (*in)) {
+                       if (*in == '/' && *(in + 1) == '>') {
+                               tag->flags |= FL_CLOSED;
+                       }
+
+                       state = parse_attr_name;
+                       *savep = in;
+               }
+               break;
+
+       case ignore_bad_tag:
+               break;
+       }
+
+       *statep = state;
+}
+
+
+struct rspamd_url *
+rspamd_html_process_url(rspamd_mempool_t *pool, const gchar *start, guint len,
+                                               struct html_tag_component *comp) {
+       struct rspamd_url *url;
+       guint saved_flags = 0;
+       gchar *decoded;
+       gint rc;
+       gsize decoded_len;
+       const gchar *p, *s, *prefix = "http://";
+       gchar *d;
+       guint i;
+       gsize dlen;
+       gboolean has_bad_chars = FALSE, no_prefix = FALSE;
+       static const gchar hexdigests[16] = "0123456789abcdef";
+
+       p = start;
+
+       /* Strip spaces from the url */
+       /* Head spaces */
+       while (p < start + len && g_ascii_isspace (*p)) {
+               p++;
+               start++;
+               len--;
+       }
+
+       if (comp) {
+               comp->start = p;
+               comp->len = len;
+       }
+
+       /* Trailing spaces */
+       p = start + len - 1;
+
+       while (p >= start && g_ascii_isspace (*p)) {
+               p--;
+               len--;
+
+               if (comp) {
+                       comp->len--;
+               }
+       }
+
+       s = start;
+       dlen = 0;
+
+       for (i = 0; i < len; i++) {
+               if (G_UNLIKELY (((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) {
+                       dlen += 3;
+               }
+               else {
+                       dlen++;
+               }
+       }
+
+       if (rspamd_substring_search(start, len, "://", 3) == -1) {
+               if (len >= sizeof("mailto:") &&
+                       (memcmp(start, "mailto:", sizeof("mailto:") - 1) == 0 ||
+                        memcmp(start, "tel:", sizeof("tel:") - 1) == 0 ||
+                        memcmp(start, "callto:", sizeof("callto:") - 1) == 0)) {
+                       /* Exclusion, has valid but 'strange' prefix */
+               }
+               else {
+                       for (i = 0; i < len; i++) {
+                               if (!((s[i] & 0x80) || g_ascii_isalnum (s[i]))) {
+                                       if (i == 0 && len > 2 && s[i] == '/' && s[i + 1] == '/') {
+                                               prefix = "http:";
+                                               dlen += sizeof("http:") - 1;
+                                               no_prefix = TRUE;
+                                       }
+                                       else if (s[i] == '@') {
+                                               /* Likely email prefix */
+                                               prefix = "mailto://";
+                                               dlen += sizeof("mailto://") - 1;
+                                               no_prefix = TRUE;
+                                       }
+                                       else if (s[i] == ':' && i != 0) {
+                                               /* Special case */
+                                               no_prefix = FALSE;
+                                       }
+                                       else {
+                                               if (i == 0) {
+                                                       /* No valid data */
+                                                       return NULL;
+                                               }
+                                               else {
+                                                       no_prefix = TRUE;
+                                                       dlen += strlen(prefix);
+                                               }
+                                       }
+
+                                       break;
+                               }
+                       }
+               }
+       }
+
+       decoded = rspamd_mempool_alloc (pool, dlen + 1);
+       d = decoded;
+
+       if (no_prefix) {
+               gsize plen = strlen(prefix);
+               memcpy(d, prefix, plen);
+               d += plen;
+       }
+
+       /*
+        * We also need to remove all internal newlines, spaces
+        * and encode unsafe characters
+        */
+       for (i = 0; i < len; i++) {
+               if (G_UNLIKELY (g_ascii_isspace(s[i]))) {
+                       continue;
+               }
+               else if (G_UNLIKELY (((guint) s[i]) < 0x80 && !g_ascii_isgraph(s[i]))) {
+                       /* URL encode */
+                       *d++ = '%';
+                       *d++ = hexdigests[(s[i] >> 4) & 0xf];
+                       *d++ = hexdigests[s[i] & 0xf];
+                       has_bad_chars = TRUE;
+               }
+               else {
+                       *d++ = s[i];
+               }
+       }
+
+       *d = '\0';
+       dlen = d - decoded;
+
+       url = rspamd_mempool_alloc0 (pool, sizeof(*url));
+
+       rspamd_url_normalise_propagate_flags (pool, decoded, &dlen, saved_flags);
+
+       rc = rspamd_url_parse(url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF);
+
+       /* Filter some completely damaged urls */
+       if (rc == URI_ERRNO_OK && url->hostlen > 0 &&
+               !((url->protocol & PROTOCOL_UNKNOWN))) {
+               url->flags |= saved_flags;
+
+               if (has_bad_chars) {
+                       url->flags |= RSPAMD_URL_FLAG_OBSCURED;
+               }
+
+               if (no_prefix) {
+                       url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
+
+                       if (url->tldlen == 0 || (url->flags & RSPAMD_URL_FLAG_NO_TLD)) {
+                               /* Ignore urls with both no schema and no tld */
+                               return NULL;
+                       }
+               }
+
+               decoded = url->string;
+               decoded_len = url->urllen;
+
+               if (comp) {
+                       comp->start = decoded;
+                       comp->len = decoded_len;
+               }
+               /* Spaces in href usually mean an attempt to obfuscate URL */
+               /* See https://github.com/vstakhov/rspamd/issues/593 */
+#if 0
+               if (has_spaces) {
+                       url->flags |= RSPAMD_URL_FLAG_OBSCURED;
+               }
+#endif
+
+               return url;
+       }
+
+       return NULL;
+}
+
+static struct rspamd_url *
+rspamd_html_process_url_tag(rspamd_mempool_t *pool, struct html_tag *tag,
+                                                       struct html_content *hc) {
+       struct html_tag_component *comp;
+       GList *cur;
+       struct rspamd_url *url;
+       const gchar *start;
+       gsize len;
+
+       cur = tag->params->head;
+
+       while (cur) {
+               comp = cur->data;
+
+               if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) {
+                       start = comp->start;
+                       len = comp->len;
+
+                       /* Check base url */
+                       if (hc && hc->base_url && comp->len > 2) {
+                               /*
+                                * Relative url cannot start from the following:
+                                * schema://
+                                * data:
+                                * slash
+                                */
+                               gchar *buf;
+                               gsize orig_len;
+
+                               if (rspamd_substring_search(start, len, "://", 3) == -1) {
+
+                                       if (len >= sizeof("data:") &&
+                                               g_ascii_strncasecmp(start, "data:", sizeof("data:") - 1) == 0) {
+                                               /* Image data url, never insert as url */
+                                               return NULL;
+                                       }
+
+                                       /* Assume relative url */
+
+                                       gboolean need_slash = FALSE;
+
+                                       orig_len = len;
+                                       len += hc->base_url->urllen;
+
+                                       if (hc->base_url->datalen == 0) {
+                                               need_slash = TRUE;
+                                               len++;
+                                       }
+
+                                       buf = rspamd_mempool_alloc (pool, len + 1);
+                                       rspamd_snprintf(buf, len + 1, "%*s%s%*s",
+                                                       hc->base_url->urllen, hc->base_url->string,
+                                                       need_slash ? "/" : "",
+                                                       (gint) orig_len, start);
+                                       start = buf;
+                               }
+                               else if (start[0] == '/' && start[1] != '/') {
+                                       /* Relative to the hostname */
+                                       orig_len = len;
+                                       len += hc->base_url->hostlen + hc->base_url->protocollen +
+                                                  3 /* for :// */;
+                                       buf = rspamd_mempool_alloc (pool, len + 1);
+                                       rspamd_snprintf(buf, len + 1, "%*s://%*s/%*s",
+                                                       hc->base_url->protocollen, hc->base_url->string,
+                                                       hc->base_url->hostlen, rspamd_url_host_unsafe (hc->base_url),
+                                                       (gint) orig_len, start);
+                                       start = buf;
+                               }
+                       }
+
+                       url = rspamd_html_process_url(pool, start, len, comp);
+
+                       if (url && tag->extra == NULL) {
+                               tag->extra = url;
+                       }
+
+                       return url;
+               }
+
+               cur = g_list_next (cur);
+       }
+
+       return NULL;
+}
+
+struct rspamd_html_url_query_cbd {
+       rspamd_mempool_t *pool;
+       khash_t (rspamd_url_hash) *url_set;
+       struct rspamd_url *url;
+       GPtrArray *part_urls;
+};
+
+static gboolean
+rspamd_html_url_query_callback(struct rspamd_url *url, gsize start_offset,
+                                                          gsize end_offset, gpointer ud) {
+       struct rspamd_html_url_query_cbd *cbd =
+                       (struct rspamd_html_url_query_cbd *) ud;
+       rspamd_mempool_t *pool;
+
+       pool = cbd->pool;
+
+       if (url->protocol == PROTOCOL_MAILTO) {
+               if (url->userlen == 0) {
+                       return FALSE;
+               }
+       }
+
+       msg_debug_html ("found url %s in query of url"
+                                       " %*s", url->string,
+                       cbd->url->querylen, rspamd_url_query_unsafe(cbd->url));
+
+       url->flags |= RSPAMD_URL_FLAG_QUERY;
+
+       if (rspamd_url_set_add_or_increase(cbd->url_set, url, false)
+               && cbd->part_urls) {
+               g_ptr_array_add(cbd->part_urls, url);
+       }
+
+       return TRUE;
+}
+
+static void
+rspamd_process_html_url(rspamd_mempool_t *pool, struct rspamd_url *url,
+                                               khash_t (rspamd_url_hash) *url_set,
+                                               GPtrArray *part_urls) {
+       if (url->querylen > 0) {
+               struct rspamd_html_url_query_cbd qcbd;
+
+               qcbd.pool = pool;
+               qcbd.url_set = url_set;
+               qcbd.url = url;
+               qcbd.part_urls = part_urls;
+
+               rspamd_url_find_multiple(pool,
+                               rspamd_url_query_unsafe (url), url->querylen,
+                               RSPAMD_URL_FIND_ALL, NULL,
+                               rspamd_html_url_query_callback, &qcbd);
+       }
+
+       if (part_urls) {
+               g_ptr_array_add(part_urls, url);
+       }
+}
+
+static void
+rspamd_html_process_data_image(rspamd_mempool_t *pool,
+                                                          struct html_image *img,
+                                                          struct html_tag_component *src) {
+       /*
+        * Here, we do very basic processing of the data:
+        * detect if we have something like: `data:image/xxx;base64,yyyzzz==`
+        * We only parse base64 encoded data.
+        * We ignore content type so far
+        */
+       struct rspamd_image *parsed_image;
+       const gchar *semicolon_pos = NULL, *end = src->start + src->len;
+
+       semicolon_pos = src->start;
+
+       while ((semicolon_pos = memchr(semicolon_pos, ';', end - semicolon_pos)) != NULL) {
+               if (end - semicolon_pos > sizeof("base64,")) {
+                       if (memcmp(semicolon_pos + 1, "base64,", sizeof("base64,") - 1) == 0) {
+                               const gchar *data_pos = semicolon_pos + sizeof("base64,");
+                               gchar *decoded;
+                               gsize encoded_len = end - data_pos, decoded_len;
+                               rspamd_ftok_t inp;
+
+                               decoded_len = (encoded_len / 4 * 3) + 12;
+                               decoded = rspamd_mempool_alloc (pool, decoded_len);
+                               rspamd_cryptobox_base64_decode(data_pos, encoded_len,
+                                               decoded, &decoded_len);
+                               inp.begin = decoded;
+                               inp.len = decoded_len;
+
+                               parsed_image = rspamd_maybe_process_image(pool, &inp);
+
+                               if (parsed_image) {
+                                       msg_debug_html ("detected %s image of size %ud x %ud in data url",
+                                                       rspamd_image_type_str(parsed_image->type),
+                                                       parsed_image->width, parsed_image->height);
+                                       img->embedded_image = parsed_image;
+                               }
+                       }
+
+                       break;
+               }
+               else {
+                       /* Nothing useful */
+                       return;
+               }
+
+               semicolon_pos++;
+       }
+}
+
+static void
+rspamd_html_process_img_tag(rspamd_mempool_t *pool, struct html_tag *tag,
+                                                       struct html_content *hc, khash_t (rspamd_url_hash) *url_set,
+                                                       GPtrArray *part_urls,
+                                                       GByteArray *dest) {
+       struct html_tag_component *comp;
+       struct html_image *img;
+       rspamd_ftok_t fstr;
+       const guchar *p;
+       GList *cur;
+       gulong val;
+       gboolean seen_width = FALSE, seen_height = FALSE;
+       goffset pos;
+
+       cur = tag->params->head;
+       img = rspamd_mempool_alloc0 (pool, sizeof(*img));
+       img->tag = tag;
+       tag->flags |= FL_IMAGE;
+
+       while (cur) {
+               comp = cur->data;
+
+               if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) {
+                       fstr.begin = (gchar *) comp->start;
+                       fstr.len = comp->len;
+                       img->src = rspamd_mempool_ftokdup (pool, &fstr);
+
+                       if (comp->len > sizeof("cid:") - 1 && memcmp(comp->start,
+                                       "cid:", sizeof("cid:") - 1) == 0) {
+                               /* We have an embedded image */
+                               img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
+                       }
+                       else {
+                               if (comp->len > sizeof("data:") - 1 && memcmp(comp->start,
+                                               "data:", sizeof("data:") - 1) == 0) {
+                                       /* We have an embedded image in HTML tag */
+                                       img->flags |=
+                                                       (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA);
+                                       rspamd_html_process_data_image(pool, img, comp);
+                                       hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
+                               }
+                               else {
+                                       img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
+                                       if (img->src) {
+
+                                               img->url = rspamd_html_process_url(pool,
+                                                               img->src, fstr.len, NULL);
+
+                                               if (img->url) {
+                                                       struct rspamd_url *existing;
+
+                                                       img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
+                                                       existing = rspamd_url_set_add_or_return(url_set, img->url);
+
+                                                       if (existing != img->url) {
+                                                               /*
+                                                                * We have some other URL that could be
+                                                                * found, e.g. from another part. However,
+                                                                * we still want to set an image flag on it
+                                                                */
+                                                               existing->flags |= img->url->flags;
+                                                               existing->count++;
+                                                       }
+                                                       else if (part_urls) {
+                                                               /* New url */
+                                                               g_ptr_array_add(part_urls, img->url);
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
+               else if (comp->type == RSPAMD_HTML_COMPONENT_HEIGHT) {
+                       rspamd_strtoul(comp->start, comp->len, &val);
+                       img->height = val;
+                       seen_height = TRUE;
+               }
+               else if (comp->type == RSPAMD_HTML_COMPONENT_WIDTH) {
+                       rspamd_strtoul(comp->start, comp->len, &val);
+                       img->width = val;
+                       seen_width = TRUE;
+               }
+               else if (comp->type == RSPAMD_HTML_COMPONENT_STYLE) {
+                       /* Try to search for height= or width= in style tag */
+                       if (!seen_height && comp->len > 0) {
+                               pos = rspamd_substring_search_caseless(comp->start, comp->len,
+                                               "height", sizeof("height") - 1);
+
+                               if (pos != -1) {
+                                       p = comp->start + pos + sizeof("height") - 1;
+
+                                       while (p < comp->start + comp->len) {
+                                               if (g_ascii_isdigit (*p)) {
+                                                       rspamd_strtoul(p, comp->len - (p - comp->start), &val);
+                                                       img->height = val;
+                                                       break;
+                                               }
+                                               else if (!g_ascii_isspace (*p) && *p != '=' && *p != ':') {
+                                                       /* Fallback */
+                                                       break;
+                                               }
+                                               p++;
+                                       }
+                               }
+                       }
+
+                       if (!seen_width && comp->len > 0) {
+                               pos = rspamd_substring_search_caseless(comp->start, comp->len,
+                                               "width", sizeof("width") - 1);
+
+                               if (pos != -1) {
+                                       p = comp->start + pos + sizeof("width") - 1;
+
+                                       while (p < comp->start + comp->len) {
+                                               if (g_ascii_isdigit (*p)) {
+                                                       rspamd_strtoul(p, comp->len - (p - comp->start), &val);
+                                                       img->width = val;
+                                                       break;
+                                               }
+                                               else if (!g_ascii_isspace (*p) && *p != '=' && *p != ':') {
+                                                       /* Fallback */
+                                                       break;
+                                               }
+                                               p++;
+                                       }
+                               }
+                       }
+               }
+               else if (comp->type == RSPAMD_HTML_COMPONENT_ALT && comp->len > 0 && dest != NULL) {
+                       if (dest->len > 0 && !g_ascii_isspace (dest->data[dest->len - 1])) {
+                               /* Add a space */
+                               g_byte_array_append(dest, " ", 1);
+                       }
+
+                       g_byte_array_append(dest, comp->start, comp->len);
+
+                       if (!g_ascii_isspace (dest->data[dest->len - 1])) {
+                               /* Add a space */
+                               g_byte_array_append(dest, " ", 1);
+                       }
+               }
+
+               cur = g_list_next (cur);
+       }
+
+       if (hc->images == NULL) {
+               hc->images = g_ptr_array_sized_new(4);
+               rspamd_mempool_notify_alloc (pool, 4 * sizeof(gpointer) + sizeof(GPtrArray));
+               rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard,
+                               hc->images);
+       }
+
+       if (img->embedded_image) {
+               if (!seen_height) {
+                       img->height = img->embedded_image->height;
+               }
+               if (!seen_width) {
+                       img->width = img->embedded_image->width;
+               }
+       }
+
+       g_ptr_array_add(hc->images, img);
+       tag->extra = img;
+}
+
+static void
+rspamd_html_process_link_tag(rspamd_mempool_t *pool, struct html_tag *tag,
+                                                        struct html_content *hc, khash_t (rspamd_url_hash) *url_set,
+                                                        GPtrArray *part_urls) {
+       struct html_tag_component *comp;
+       GList *cur;
+
+       cur = tag->params->head;
+
+       while (cur) {
+               comp = cur->data;
+
+               if (comp->type == RSPAMD_HTML_COMPONENT_REL && comp->len > 0) {
+                       if (comp->len == sizeof("icon") - 1 &&
+                               rspamd_lc_cmp(comp->start, "icon", sizeof("icon") - 1) == 0) {
+
+                               rspamd_html_process_img_tag(pool, tag, hc, url_set, part_urls, NULL);
+                       }
+               }
+
+               cur = g_list_next (cur);
+       }
+}
+
+static void
+rspamd_html_process_color(const gchar *line, guint len, struct html_color *cl) {
+       const gchar *p = line, *end = line + len;
+       char hexbuf[7];
+       rspamd_ftok_t search;
+       struct html_color *el;
+
+       memset(cl, 0, sizeof(*cl));
+
+       if (*p == '#') {
+               /* HEX color */
+               p++;
+               rspamd_strlcpy(hexbuf, p, MIN ((gint) sizeof(hexbuf), end - p + 1));
+               cl->d.val = strtoul(hexbuf, NULL, 16);
+               cl->d.comp.alpha = 255;
+               cl->valid = TRUE;
+       }
+       else if (len > 4 && rspamd_lc_cmp(p, "rgb", 3) == 0) {
+               /* We have something like rgba(x,x,x,x) or rgb(x,x,x) */
+               enum {
+                       obrace,
+                       num1,
+                       num2,
+                       num3,
+                       num4,
+                       skip_spaces
+               } state = skip_spaces, next_state = obrace;
+               gulong r = 0, g = 0, b = 0, opacity = 255;
+               const gchar *c;
+               gboolean valid = FALSE;
+
+               p += 3;
+
+               if (*p == 'a') {
+                       p++;
+               }
+
+               c = p;
+
+               while (p < end) {
+                       switch (state) {
+                       case obrace:
+                               if (*p == '(') {
+                                       p++;
+                                       state = skip_spaces;
+                                       next_state = num1;
+                               }
+                               else if (g_ascii_isspace (*p)) {
+                                       state = skip_spaces;
+                                       next_state = obrace;
+                               }
+                               else {
+                                       goto stop;
+                               }
+                               break;
+                       case num1:
+                               if (*p == ',') {
+                                       if (!rspamd_strtoul(c, p - c, &r)) {
+                                               goto stop;
+                                       }
+
+                                       p++;
+                                       state = skip_spaces;
+                                       next_state = num2;
+                               }
+                               else if (!g_ascii_isdigit (*p)) {
+                                       goto stop;
+                               }
+                               else {
+                                       p++;
+                               }
+                               break;
+                       case num2:
+                               if (*p == ',') {
+                                       if (!rspamd_strtoul(c, p - c, &g)) {
+                                               goto stop;
+                                       }
+
+                                       p++;
+                                       state = skip_spaces;
+                                       next_state = num3;
+                               }
+                               else if (!g_ascii_isdigit (*p)) {
+                                       goto stop;
+                               }
+                               else {
+                                       p++;
+                               }
+                               break;
+                       case num3:
+                               if (*p == ',') {
+                                       if (!rspamd_strtoul(c, p - c, &b)) {
+                                               goto stop;
+                                       }
+
+                                       valid = TRUE;
+                                       p++;
+                                       state = skip_spaces;
+                                       next_state = num4;
+                               }
+                               else if (*p == ')') {
+                                       if (!rspamd_strtoul(c, p - c, &b)) {
+                                               goto stop;
+                                       }
+
+                                       valid = TRUE;
+                                       goto stop;
+                               }
+                               else if (!g_ascii_isdigit (*p)) {
+                                       goto stop;
+                               }
+                               else {
+                                       p++;
+                               }
+                               break;
+                       case num4:
+                               if (*p == ',') {
+                                       if (!rspamd_strtoul(c, p - c, &opacity)) {
+                                               goto stop;
+                                       }
+
+                                       valid = TRUE;
+                                       goto stop;
+                               }
+                               else if (*p == ')') {
+                                       if (!rspamd_strtoul(c, p - c, &opacity)) {
+                                               goto stop;
+                                       }
+
+                                       valid = TRUE;
+                                       goto stop;
+                               }
+                               else if (!g_ascii_isdigit (*p)) {
+                                       goto stop;
+                               }
+                               else {
+                                       p++;
+                               }
+                               break;
+                       case skip_spaces:
+                               if (!g_ascii_isspace (*p)) {
+                                       c = p;
+                                       state = next_state;
+                               }
+                               else {
+                                       p++;
+                               }
+                               break;
+                       }
+               }
+
+stop:
+
+               if (valid) {
+                       cl->d.comp.r = r;
+                       cl->d.comp.g = g;
+                       cl->d.comp.b = b;
+                       cl->d.comp.alpha = opacity;
+                       cl->valid = TRUE;
+               }
+       }
+       else {
+               khiter_t k;
+               /* Compare color by name */
+               search.begin = line;
+               search.len = len;
+
+               k = kh_get (color_by_name, html_color_by_name, &search);
+
+               if (k != kh_end (html_color_by_name)) {
+                       el = &kh_val (html_color_by_name, k);
+                       memcpy(cl, el, sizeof(*cl));
+                       cl->d.comp.alpha = 255; /* Non transparent */
+               }
+       }
+}
+
+/*
+ * Target is used for in and out if this function returns TRUE
+ */
+static gboolean
+rspamd_html_process_css_size(const gchar *suffix, gsize len,
+                                                        gdouble *tgt) {
+       gdouble sz = *tgt;
+       gboolean ret = FALSE;
+
+       if (len >= 2) {
+               if (memcmp(suffix, "px", 2) == 0) {
+                       sz = (guint) sz; /* Round to number */
+                       ret = TRUE;
+               }
+               else if (memcmp(suffix, "em", 2) == 0) {
+                       /* EM is 16 px, so multiply and round */
+                       sz = (guint) (sz * 16.0);
+                       ret = TRUE;
+               }
+               else if (len >= 3 && memcmp(suffix, "rem", 3) == 0) {
+                       /* equal to EM in our case */
+                       sz = (guint) (sz * 16.0);
+                       ret = TRUE;
+               }
+               else if (memcmp(suffix, "ex", 2) == 0) {
+                       /*
+                        * Represents the x-height of the element's font.
+                        * On fonts with the "x" letter, this is generally the height
+                        * of lowercase letters in the font; 1ex = 0.5em in many fonts.
+                        */
+                       sz = (guint) (sz * 8.0);
+                       ret = TRUE;
+               }
+               else if (memcmp(suffix, "vw", 2) == 0) {
+                       /*
+                        * Vewport width in percentages:
+                        * we assume 1% of viewport width as 8px
+                        */
+                       sz = (guint) (sz * 8.0);
+                       ret = TRUE;
+               }
+               else if (memcmp(suffix, "vh", 2) == 0) {
+                       /*
+                        * Vewport height in percentages
+                        * we assume 1% of viewport width as 6px
+                        */
+                       sz = (guint) (sz * 6.0);
+                       ret = TRUE;
+               }
+               else if (len >= 4 && memcmp(suffix, "vmax", 4) == 0) {
+                       /*
+                        * Vewport width in percentages
+                        * we assume 1% of viewport width as 6px
+                        */
+                       sz = (guint) (sz * 8.0);
+                       ret = TRUE;
+               }
+               else if (len >= 4 && memcmp(suffix, "vmin", 4) == 0) {
+                       /*
+                        * Vewport height in percentages
+                        * we assume 1% of viewport width as 6px
+                        */
+                       sz = (guint) (sz * 6.0);
+                       ret = TRUE;
+               }
+               else if (memcmp(suffix, "pt", 2) == 0) {
+                       sz = (guint) (sz * 96.0 / 72.0); /* One point. 1pt = 1/72nd of 1in */
+                       ret = TRUE;
+               }
+               else if (memcmp(suffix, "cm", 2) == 0) {
+                       sz = (guint) (sz * 96.0 / 2.54); /* 96px/2.54 */
+                       ret = TRUE;
+               }
+               else if (memcmp(suffix, "mm", 2) == 0) {
+                       sz = (guint) (sz * 9.6 / 2.54); /* 9.6px/2.54 */
+                       ret = TRUE;
+               }
+               else if (memcmp(suffix, "in", 2) == 0) {
+                       sz = (guint) (sz * 96.0); /* 96px */
+                       ret = TRUE;
+               }
+               else if (memcmp(suffix, "pc", 2) == 0) {
+                       sz = (guint) (sz * 96.0 / 6.0); /* 1pc = 12pt = 1/6th of 1in. */
+                       ret = TRUE;
+               }
+       }
+       else if (suffix[0] == '%') {
+               /* Percentages from 16 px */
+               sz = (guint) (sz / 100.0 * 16.0);
+               ret = TRUE;
+       }
+
+       if (ret) {
+               *tgt = sz;
+       }
+
+       return ret;
+}
+
+static void
+rspamd_html_process_font_size(const gchar *line, guint len, guint *fs,
+                                                         gboolean is_css) {
+       const gchar *p = line, *end = line + len;
+       gchar *err = NULL, numbuf[64];
+       gdouble sz = 0;
+       gboolean failsafe = FALSE;
+
+       while (p < end && g_ascii_isspace (*p)) {
+               p++;
+               len--;
+       }
+
+       if (g_ascii_isdigit (*p)) {
+               rspamd_strlcpy(numbuf, p, MIN (sizeof(numbuf), len + 1));
+               sz = strtod(numbuf, &err);
+
+               /* Now check leftover */
+               if (sz < 0) {
+                       sz = 0;
+               }
+       }
+       else {
+               /* Ignore the rest */
+               failsafe = TRUE;
+               sz = is_css ? 16 : 1;
+               /* TODO: add textual fonts descriptions */
+       }
+
+       if (err && *err != '\0') {
+               const gchar *e = err;
+               gsize slen;
+
+               /* Skip spaces */
+               while (*e && g_ascii_isspace (*e)) {
+                       e++;
+               }
+
+               /* Lowercase */
+               slen = strlen(e);
+               rspamd_str_lc((gchar *) e, slen);
+
+               if (!rspamd_html_process_css_size(e, slen, &sz)) {
+                       failsafe = TRUE;
+               }
+       }
+       else {
+               /* Failsafe naked number */
+               failsafe = TRUE;
+       }
+
+       if (failsafe) {
+               if (is_css) {
+                       /*
+                        * In css mode we usually ignore sizes, but let's treat
+                        * small sizes specially
+                        */
+                       if (sz < 1) {
+                               sz = 0;
+                       }
+                       else {
+                               sz = 16; /* Ignore */
+                       }
+               }
+               else {
+                       /* In non-css mode we have to check legacy size */
+                       sz = sz >= 1 ? sz * 16 : 16;
+               }
+       }
+
+       if (sz > 32) {
+               sz = 32;
+       }
+
+       *fs = sz;
+}
+
+static void
+rspamd_html_process_style(rspamd_mempool_t *pool, struct html_block *bl,
+                                                 struct html_content *hc, const gchar *style, guint len) {
+       const gchar *p, *c, *end, *key = NULL;
+       enum {
+               read_key,
+               read_colon,
+               read_value,
+               skip_spaces,
+       } state = skip_spaces, next_state = read_key;
+       guint klen = 0;
+       gdouble opacity = 1.0;
+
+       p = style;
+       c = p;
+       end = p + len;
+
+       while (p <= end) {
+               switch (state) {
+               case read_key:
+                       if (p == end || *p == ':') {
+                               key = c;
+                               klen = p - c;
+                               state = skip_spaces;
+                               next_state = read_value;
+                       }
+                       else if (g_ascii_isspace (*p)) {
+                               key = c;
+                               klen = p - c;
+                               state = skip_spaces;
+                               next_state = read_colon;
+                       }
+
+                       p++;
+                       break;
+
+               case read_colon:
+                       if (p == end || *p == ':') {
+                               state = skip_spaces;
+                               next_state = read_value;
+                       }
+
+                       p++;
+                       break;
+
+               case read_value:
+                       if (p == end || *p == ';') {
+                               if (key && klen && p - c > 0) {
+                                       if ((klen == 5 && g_ascii_strncasecmp(key, "color", 5) == 0)
+                                               || (klen == 10 && g_ascii_strncasecmp(key, "font-color", 10) == 0)) {
+
+                                               rspamd_html_process_color(c, p - c, &bl->font_color);
+                                               msg_debug_html ("got color: %xd", bl->font_color.d.val);
+                                       }
+                                       else if ((klen == 16 && g_ascii_strncasecmp(key,
+                                                       "background-color", 16) == 0) ||
+                                                        (klen == 10 && g_ascii_strncasecmp(key,
+                                                                        "background", 10) == 0)) {
+
+                                               rspamd_html_process_color(c, p - c, &bl->background_color);
+                                               msg_debug_html ("got bgcolor: %xd", bl->background_color.d.val);
+                                       }
+                                       else if (klen == 7 && g_ascii_strncasecmp(key, "display", 7) == 0) {
+                                               if (p - c >= 4 && rspamd_substring_search_caseless(c, p - c,
+                                                               "none", 4) != -1) {
+                                                       bl->visible = FALSE;
+                                                       msg_debug_html ("tag is not visible");
+                                               }
+                                       }
+                                       else if (klen == 9 &&
+                                                        g_ascii_strncasecmp(key, "font-size", 9) == 0) {
+                                               rspamd_html_process_font_size(c, p - c,
+                                                               &bl->font_size, TRUE);
+                                               msg_debug_html ("got font size: %ud", bl->font_size);
+                                       }
+                                       else if (klen == 7 &&
+                                                        g_ascii_strncasecmp(key, "opacity", 7) == 0) {
+                                               gchar numbuf[64];
+
+                                               rspamd_strlcpy(numbuf, c,
+                                                               MIN (sizeof(numbuf), p - c + 1));
+                                               opacity = strtod(numbuf, NULL);
+
+                                               if (opacity > 1) {
+                                                       opacity = 1;
+                                               }
+                                               else if (opacity < 0) {
+                                                       opacity = 0;
+                                               }
+
+                                               bl->font_color.d.comp.alpha = (guint8) (opacity * 255.0);
+                                       }
+                                       else if (klen == 10 &&
+                                                        g_ascii_strncasecmp(key, "visibility", 10) == 0) {
+                                               if (p - c >= 6 && rspamd_substring_search_caseless(c,
+                                                               p - c,
+                                                               "hidden", 6) != -1) {
+                                                       bl->visible = FALSE;
+                                                       msg_debug_html ("tag is not visible");
+                                               }
+                                       }
+                               }
+
+                               key = NULL;
+                               klen = 0;
+                               state = skip_spaces;
+                               next_state = read_key;
+                       }
+
+                       p++;
+                       break;
+
+               case skip_spaces:
+                       if (p < end && !g_ascii_isspace (*p)) {
+                               c = p;
+                               state = next_state;
+                       }
+                       else {
+                               p++;
+                       }
+
+                       break;
+               }
+       }
+}
+
+static void
+rspamd_html_process_block_tag(rspamd_mempool_t *pool, struct html_tag *tag,
+                                                         struct html_content *hc) {
+       struct html_tag_component *comp;
+       struct html_block *bl;
+       rspamd_ftok_t fstr;
+       GList *cur;
+
+       cur = tag->params->head;
+       bl = rspamd_mempool_alloc0 (pool, sizeof(*bl));
+       bl->tag = tag;
+       bl->visible = TRUE;
+       bl->font_size = (guint) -1;
+       bl->font_color.d.comp.alpha = 255;
+
+       while (cur) {
+               comp = cur->data;
+
+               if (comp->len > 0) {
+                       switch (comp->type) {
+                       case RSPAMD_HTML_COMPONENT_COLOR:
+                               fstr.begin = (gchar *) comp->start;
+                               fstr.len = comp->len;
+                               rspamd_html_process_color(comp->start, comp->len,
+                                               &bl->font_color);
+                               msg_debug_html ("tag %*s; got color: %xd",
+                                               tag->name.len, tag->name.start, bl->font_color.d.val);
+                               break;
+                       case RSPAMD_HTML_COMPONENT_BGCOLOR:
+                               fstr.begin = (gchar *) comp->start;
+                               fstr.len = comp->len;
+                               rspamd_html_process_color(comp->start, comp->len,
+                                               &bl->background_color);
+                               msg_debug_html ("tag %*s; got color: %xd",
+                                               tag->name.len, tag->name.start, bl->font_color.d.val);
+
+                               if (tag->id == Tag_BODY) {
+                                       /* Set global background color */
+                                       memcpy(&hc->bgcolor, &bl->background_color,
+                                                       sizeof(hc->bgcolor));
+                               }
+                               break;
+                       case RSPAMD_HTML_COMPONENT_STYLE:
+                               bl->style.len = comp->len;
+                               bl->style.start = comp->start;
+                               msg_debug_html ("tag: %*s; got style: %*s",
+                                               tag->name.len, tag->name.start,
+                                               (gint) bl->style.len, bl->style.start);
+                               rspamd_html_process_style(pool, bl, hc, comp->start, comp->len);
+                               break;
+                       case RSPAMD_HTML_COMPONENT_CLASS:
+                               fstr.begin = (gchar *) comp->start;
+                               fstr.len = comp->len;
+                               bl->html_class = rspamd_mempool_ftokdup (pool, &fstr);
+                               msg_debug_html ("tag: %*s; got class: %s",
+                                               tag->name.len, tag->name.start, bl->html_class);
+                               break;
+                       case RSPAMD_HTML_COMPONENT_SIZE:
+                               /* Not supported by html5 */
+                               /* FIXME maybe support it */
+                               bl->font_size = 16;
+                               msg_debug_html ("tag %*s; got size: %*s",
+                                               tag->name.len, tag->name.start,
+                                               (gint) comp->len, comp->start);
+                               break;
+                       default:
+                               /* NYI */
+                               break;
+                       }
+               }
+
+               cur = g_list_next (cur);
+       }
+
+       if (hc->blocks == NULL) {
+               hc->blocks = g_ptr_array_sized_new(64);
+               rspamd_mempool_notify_alloc (pool, 64 * sizeof(gpointer) + sizeof(GPtrArray));
+               rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard,
+                               hc->blocks);
+       }
+
+       g_ptr_array_add(hc->blocks, bl);
+       tag->extra = bl;
+}
+
+static void
+rspamd_html_check_displayed_url(rspamd_mempool_t *pool,
+                                                               GList **exceptions,
+                                                               khash_t (rspamd_url_hash) *url_set,
+                                                               GByteArray *dest,
+                                                               gint href_offset,
+                                                               struct rspamd_url *url) {
+       struct rspamd_url *displayed_url = NULL;
+       struct rspamd_url *turl;
+       gboolean url_found = FALSE;
+       struct rspamd_process_exception *ex;
+       guint saved_flags = 0;
+       gsize dlen;
+
+       if (href_offset < 0) {
+               /* No dispalyed url, just some text within <a> tag */
+               return;
+       }
+
+       url->visible_part = rspamd_mempool_alloc (pool, dest->len - href_offset + 1);
+       rspamd_strlcpy(url->visible_part, dest->data + href_offset,
+                       dest->len - href_offset + 1);
+       dlen = dest->len - href_offset;
+
+       /* Strip unicode spaces from the start and the end */
+       url->visible_part = rspamd_string_unicode_trim_inplace(url->visible_part,
+                       &dlen);
+       rspamd_html_url_is_phished(pool, url,
+                       url->visible_part,
+                       dlen,
+                       &url_found, &displayed_url);
+
+       if (url_found) {
+               url->flags |= saved_flags | RSPAMD_URL_FLAG_DISPLAY_URL;
+       }
+
+       if (exceptions && url_found) {
+               ex = rspamd_mempool_alloc (pool,
+                               sizeof(*ex));
+               ex->pos = href_offset;
+               ex->len = dest->len - href_offset;
+               ex->type = RSPAMD_EXCEPTION_URL;
+               ex->ptr = url;
+
+               *exceptions = g_list_prepend(*exceptions,
+                               ex);
+       }
+
+       if (displayed_url && url_set) {
+               turl = rspamd_url_set_add_or_return(url_set,
+                               displayed_url);
+
+               if (turl != NULL) {
+                       /* Here, we assume the following:
+                        * if we have a URL in the text part which
+                        * is the same as displayed URL in the
+                        * HTML part, we assume that it is also
+                        * hint only.
+                        */
+                       if (turl->flags &
+                               RSPAMD_URL_FLAG_FROM_TEXT) {
+                               turl->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
+                               turl->flags &= ~RSPAMD_URL_FLAG_FROM_TEXT;
+                       }
+
+                       turl->count++;
+               }
+               else {
+                       /* Already inserted by `rspamd_url_set_add_or_return` */
+               }
+       }
+
+       rspamd_normalise_unicode_inplace(url->visible_part, &dlen);
+}
+
+static gboolean
+rspamd_html_propagate_lengths(GNode *node, gpointer _unused) {
+       GNode *child;
+       struct html_tag *tag = node->data, *cld_tag;
+
+       if (tag) {
+               child = node->children;
+
+               /* Summarize content length from children */
+               while (child) {
+                       cld_tag = child->data;
+                       tag->content_length += cld_tag->content_length;
+                       child = child->next;
+               }
+       }
+
+       return FALSE;
+}
+
+static void
+rspamd_html_propagate_style(struct html_content *hc,
+                                                       struct html_tag *tag,
+                                                       struct html_block *bl,
+                                                       GQueue *blocks) {
+       struct html_block *bl_parent;
+       gboolean push_block = FALSE;
+
+
+       /* Propagate from the parent if needed */
+       bl_parent = g_queue_peek_tail(blocks);
+
+       if (bl_parent) {
+               if (!bl->background_color.valid) {
+                       /* Try to propagate background color from parent nodes */
+                       if (bl_parent->background_color.valid) {
+                               memcpy(&bl->background_color, &bl_parent->background_color,
+                                               sizeof(bl->background_color));
+                       }
+               }
+               else {
+                       push_block = TRUE;
+               }
+
+               if (!bl->font_color.valid) {
+                       /* Try to propagate background color from parent nodes */
+                       if (bl_parent->font_color.valid) {
+                               memcpy(&bl->font_color, &bl_parent->font_color,
+                                               sizeof(bl->font_color));
+                       }
+               }
+               else {
+                       push_block = TRUE;
+               }
+
+               /* Propagate font size */
+               if (bl->font_size == (guint) -1) {
+                       if (bl_parent->font_size != (guint) -1) {
+                               bl->font_size = bl_parent->font_size;
+                       }
+               }
+               else {
+                       push_block = TRUE;
+               }
+       }
+
+       /* Set bgcolor to the html bgcolor and font color to black as a last resort */
+       if (!bl->font_color.valid) {
+               /* Don't touch opacity as it can be set separately */
+               bl->font_color.d.comp.r = 0;
+               bl->font_color.d.comp.g = 0;
+               bl->font_color.d.comp.b = 0;
+               bl->font_color.valid = TRUE;
+       }
+       else {
+               push_block = TRUE;
+       }
+
+       if (!bl->background_color.valid) {
+               memcpy(&bl->background_color, &hc->bgcolor, sizeof(hc->bgcolor));
+       }
+       else {
+               push_block = TRUE;
+       }
+
+       if (bl->font_size == (guint) -1) {
+               bl->font_size = 16; /* Default for browsers */
+       }
+       else {
+               push_block = TRUE;
+       }
+
+       if (push_block && !(tag->flags & FL_CLOSED)) {
+               g_queue_push_tail(blocks, bl);
+       }
+}
+
+}
+
+GByteArray*
+rspamd_html_process_part_full (rspamd_mempool_t *pool,
+                                                          struct html_content *hc,
+                                                          GByteArray *in,
+                                                          GList **exceptions,
+                                                          khash_t (rspamd_url_hash) *url_set,
+                                                          GPtrArray *part_urls,
+                                                          bool allow_css)
+{
+       const guchar *p, *c, *end, *savep = NULL;
+       guchar t;
+       gboolean closing = FALSE, need_decode = FALSE, save_space = FALSE,
+                       balanced;
+       GByteArray *dest;
+       guint obrace = 0, ebrace = 0;
+       GNode *cur_level = NULL;
+       gint substate = 0, len, href_offset = -1;
+       struct html_tag *cur_tag = NULL, *content_tag = NULL;
+       struct rspamd_url *url = NULL;
+       GQueue *styles_blocks;
+
+       enum {
+               parse_start = 0,
+               tag_begin,
+               sgml_tag,
+               xml_tag,
+               compound_tag,
+               comment_tag,
+               comment_content,
+               sgml_content,
+               tag_content,
+               tag_end,
+               xml_tag_end,
+               content_ignore,
+               content_write,
+               content_style,
+               content_ignore_sp
+       } state = parse_start;
+
+       g_assert (in != NULL);
+       g_assert (hc != NULL);
+       g_assert (pool != NULL);
+
+       rspamd_html_library_init ();
+       hc->tags_seen = rspamd_mempool_alloc0 (pool, NBYTES (N_TAGS));
+
+       /* Set white background color by default */
+       hc->bgcolor.d.comp.alpha = 0;
+       hc->bgcolor.d.comp.r = 255;
+       hc->bgcolor.d.comp.g = 255;
+       hc->bgcolor.d.comp.b = 255;
+       hc->bgcolor.valid = TRUE;
+
+       dest = g_byte_array_sized_new (in->len / 3 * 2);
+       styles_blocks = g_queue_new ();
+
+       p = in->data;
+       c = p;
+       end = p + in->len;
+
+       while (p < end) {
+               t = *p;
+
+               switch (state) {
+               case parse_start:
+                       if (t == '<') {
+                               state = tag_begin;
+                       }
+                       else {
+                               /* We have no starting tag, so assume that it's content */
+                               hc->flags |= RSPAMD_HTML_FLAG_BAD_START;
+                               state = content_write;
+                       }
+
+                       break;
+               case tag_begin:
+                       switch (t) {
+                       case '<':
+                               p ++;
+                               closing = FALSE;
+                               break;
+                       case '!':
+                               state = sgml_tag;
+                               p ++;
+                               break;
+                       case '?':
+                               state = xml_tag;
+                               hc->flags |= RSPAMD_HTML_FLAG_XML;
+                               p ++;
+                               break;
+                       case '/':
+                               closing = TRUE;
+                               p ++;
+                               break;
+                       case '>':
+                               /* Empty tag */
+                               hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+                               state = tag_end;
+                               continue;
+                       default:
+                               state = tag_content;
+                               substate = 0;
+                               savep = NULL;
+                               cur_tag = rspamd_mempool_alloc0 (pool, sizeof (*cur_tag));
+                               cur_tag->params = g_queue_new ();
+                               rspamd_mempool_add_destructor (pool,
+                                               (rspamd_mempool_destruct_t)g_queue_free, cur_tag->params);
+                               break;
+                       }
+
+                       break;
+
+               case sgml_tag:
+                       switch (t) {
+                       case '[':
+                               state = compound_tag;
+                               obrace = 1;
+                               ebrace = 0;
+                               p ++;
+                               break;
+                       case '-':
+                               state = comment_tag;
+                               p ++;
+                               break;
+                       default:
+                               state = sgml_content;
+                               break;
+                       }
+
+                       break;
+
+               case xml_tag:
+                       if (t == '?') {
+                               state = xml_tag_end;
+                       }
+                       else if (t == '>') {
+                               /* Misformed xml tag */
+                               hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+                               state = tag_end;
+                               continue;
+                       }
+                       /* We efficiently ignore xml tags */
+                       p ++;
+                       break;
+
+               case xml_tag_end:
+                       if (t == '>') {
+                               state = tag_end;
+                               continue;
+                       }
+                       else {
+                               hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+                               p ++;
+                       }
+                       break;
+
+               case compound_tag:
+                       if (t == '[') {
+                               obrace ++;
+                       }
+                       else if (t == ']') {
+                               ebrace ++;
+                       }
+                       else if (t == '>' && obrace == ebrace) {
+                               state = tag_end;
+                               continue;
+                       }
+                       p ++;
+                       break;
+
+               case comment_tag:
+                       if (t != '-')  {
+                               hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+                               state = tag_end;
+                       }
+                       else {
+                               p++;
+                               ebrace = 0;
+                               /*
+                                * https://www.w3.org/TR/2012/WD-html5-20120329/syntax.html#syntax-comments
+                                *  ... the text must not start with a single
+                                *  U+003E GREATER-THAN SIGN character (>),
+                                *  nor start with a "-" (U+002D) character followed by
+                                *  a U+003E GREATER-THAN SIGN (>) character,
+                                *  nor contain two consecutive U+002D HYPHEN-MINUS
+                                *  characters (--), nor end with a "-" (U+002D) character.
+                                */
+                               if (p[0] == '-' && p + 1 < end && p[1] == '>') {
+                                       hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+                                       p ++;
+                                       state = tag_end;
+                               }
+                               else if (*p == '>') {
+                                       hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+                                       state = tag_end;
+                               }
+                               else {
+                                       state = comment_content;
+                               }
+                       }
+                       break;
+
+               case comment_content:
+                       if (t == '-') {
+                               ebrace ++;
+                       }
+                       else if (t == '>' && ebrace >= 2) {
+                               state = tag_end;
+                               continue;
+                       }
+                       else {
+                               ebrace = 0;
+                       }
+
+                       p ++;
+                       break;
+
+               case content_ignore:
+                       if (t != '<') {
+                               p ++;
+                       }
+                       else {
+                               state = tag_begin;
+                       }
+                       break;
+
+               case content_write:
+
+                       if (t != '<') {
+                               if (t == '&') {
+                                       need_decode = TRUE;
+                               }
+                               else if (g_ascii_isspace (t)) {
+                                       save_space = TRUE;
+
+                                       if (p > c) {
+                                               if (need_decode) {
+                                                       goffset old_offset = dest->len;
+
+                                                       if (content_tag) {
+                                                               if (content_tag->content_length == 0) {
+                                                                       content_tag->content_offset = old_offset;
+                                                               }
+                                                       }
+
+                                                       g_byte_array_append (dest, c, (p - c));
+
+                                                       len = rspamd_html_decode_entitles_inplace (
+                                                                       dest->data + old_offset,
+                                                                       p - c);
+                                                       dest->len = dest->len + len - (p - c);
+
+                                                       if (content_tag) {
+                                                               content_tag->content_length += len;
+                                                       }
+                                               }
+                                               else {
+                                                       len = p - c;
+
+                                                       if (content_tag) {
+                                                               if (content_tag->content_length == 0) {
+                                                                       content_tag->content_offset = dest->len;
+                                                               }
+
+                                                               content_tag->content_length += len;
+                                                       }
+
+                                                       g_byte_array_append (dest, c, len);
+                                               }
+                                       }
+
+                                       c = p;
+                                       state = content_ignore_sp;
+                               }
+                               else {
+                                       if (save_space) {
+                                               /* Append one space if needed */
+                                               if (dest->len > 0 &&
+                                                               !g_ascii_isspace (dest->data[dest->len - 1])) {
+                                                       g_byte_array_append (dest, " ", 1);
+                                                       if (content_tag) {
+                                                               if (content_tag->content_length == 0) {
+                                                                       /*
+                                                                        * Special case
+                                                                        * we have a space at the beginning but
+                                                                        * we have no set content_offset
+                                                                        * so we need to do it here
+                                                                        */
+                                                                       content_tag->content_offset = dest->len;
+                                                               }
+                                                               else {
+                                                                       content_tag->content_length++;
+                                                               }
+                                                       }
+                                               }
+                                               save_space = FALSE;
+                                       }
+                               }
+                       }
+                       else {
+                               if (c != p) {
+
+                                       if (need_decode) {
+                                               goffset old_offset = dest->len;
+
+                                               if (content_tag) {
+                                                       if (content_tag->content_length == 0) {
+                                                               content_tag->content_offset = dest->len;
+                                                       }
+                                               }
+
+                                               g_byte_array_append (dest, c, (p - c));
+                                               len = rspamd_html_decode_entitles_inplace (
+                                                               dest->data + old_offset,
+                                                               p - c);
+                                               dest->len = dest->len + len - (p - c);
+
+                                               if (content_tag) {
+                                                       content_tag->content_length += len;
+                                               }
+                                       }
+                                       else {
+                                               len = p - c;
+
+                                               if (content_tag) {
+                                                       if (content_tag->content_length == 0) {
+                                                               content_tag->content_offset = dest->len;
+                                                       }
+
+                                                       content_tag->content_length += len;
+                                               }
+
+                                               g_byte_array_append (dest, c, len);
+                                       }
+                               }
+
+                               content_tag = NULL;
+
+                               state = tag_begin;
+                               continue;
+                       }
+
+                       p ++;
+                       break;
+
+               case content_style: {
+
+                       /*
+                        * We just search for the first </s substring and then pass
+                        * the content to the parser (if needed)
+                        */
+                       goffset end_style = rspamd_substring_search (p, end - p,
+                                       "</", 2);
+                       if (end_style == -1 || g_ascii_tolower (p[end_style + 2]) != 's') {
+                               /* Invalid style */
+                               state = content_ignore;
+                       }
+                       else {
+
+                               if (allow_css) {
+                                       GError *err = NULL;
+                                       hc->css_style = rspamd_css_parse_style (pool, p, end_style, hc->css_style,
+                                                       &err);
+
+                                       if (err) {
+                                               msg_info_pool ("cannot parse css: %e", err);
+                                               g_error_free (err);
+                                       }
+                               }
+
+                               p += end_style;
+                               state = tag_begin;
+                       }
+                       break;
+               }
+
+               case content_ignore_sp:
+                       if (!g_ascii_isspace (t)) {
+                               c = p;
+                               state = content_write;
+                               continue;
+                       }
+
+                       p ++;
+                       break;
+
+               case sgml_content:
+                       /* TODO: parse DOCTYPE here */
+                       if (t == '>') {
+                               state = tag_end;
+                               /* We don't know a lot about sgml tags, ignore them */
+                               cur_tag = NULL;
+                               continue;
+                       }
+                       p ++;
+                       break;
+
+               case tag_content:
+                       rspamd_html_parse_tag_content (pool, hc, cur_tag,
+                                       p, &substate, &savep);
+                       if (t == '>') {
+                               if (closing) {
+                                       cur_tag->flags |= FL_CLOSING;
+
+                                       if (cur_tag->flags & FL_CLOSED) {
+                                               /* Bad mix of closed and closing */
+                                               hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+                                       }
+
+                                       closing = FALSE;
+                               }
+
+                               state = tag_end;
+                               continue;
+                       }
+                       p ++;
+                       break;
+
+               case tag_end:
+                       substate = 0;
+                       savep = NULL;
+
+                       if (cur_tag != NULL) {
+                               balanced = TRUE;
+
+                               if (rspamd_html_process_tag (pool, hc, cur_tag, &cur_level,
+                                               &balanced)) {
+                                       state = content_write;
+                                       need_decode = FALSE;
+                               }
+                               else {
+                                       if (cur_tag->id == Tag_STYLE) {
+                                               state = content_style;
+                                       }
+                                       else {
+                                               state = content_ignore;
+                                       }
+                               }
+
+                               if (cur_tag->id != -1 && cur_tag->id < N_TAGS) {
+                                       if (cur_tag->flags & CM_UNIQUE) {
+                                               if (isset (hc->tags_seen, cur_tag->id)) {
+                                                       /* Duplicate tag has been found */
+                                                       hc->flags |= RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS;
+                                               }
+                                       }
+                                       setbit (hc->tags_seen, cur_tag->id);
+                               }
+
+                               if (!(cur_tag->flags & (FL_CLOSED|FL_CLOSING))) {
+                                       content_tag = cur_tag;
+                               }
+
+                               /* Handle newlines */
+                               if (cur_tag->id == Tag_BR || cur_tag->id == Tag_HR) {
+                                       if (dest->len > 0 && dest->data[dest->len - 1] != '\n') {
+                                               g_byte_array_append (dest, "\r\n", 2);
+
+                                               if (content_tag) {
+                                                       if (content_tag->content_length == 0) {
+                                                               /*
+                                                                * Special case
+                                                                * we have a \r\n at the beginning but
+                                                                * we have no set content_offset
+                                                                * so we need to do it here
+                                                                */
+                                                               content_tag->content_offset = dest->len;
+                                                       }
+                                                       else {
+                                                               content_tag->content_length += 2;
+                                                       }
+                                               }
+                                       }
+                                       save_space = FALSE;
+                               }
+
+                               if ((cur_tag->id == Tag_P ||
+                                               cur_tag->id == Tag_TR ||
+                                               cur_tag->id == Tag_DIV)) {
+                                       if (dest->len > 0 && dest->data[dest->len - 1] != '\n') {
+                                               g_byte_array_append (dest, "\r\n", 2);
+
+                                               if (content_tag) {
+                                                       if (content_tag->content_length == 0) {
+                                                               /*
+                                                                * Special case
+                                                                * we have a \r\n at the beginning but
+                                                                * we have no set content_offset
+                                                                * so we need to get it here
+                                                                */
+                                                               content_tag->content_offset = dest->len;
+                                                       }
+                                                       else {
+                                                               content_tag->content_length += 2;
+                                                       }
+                                               }
+                                       }
+                                       save_space = FALSE;
+                               }
+
+                               /* XXX: uncomment when styles parsing is not so broken */
+                               if (cur_tag->flags & FL_HREF /* && !(cur_tag->flags & FL_IGNORE) */) {
+                                       if (!(cur_tag->flags & (FL_CLOSING))) {
+                                               url = rspamd_html_process_url_tag (pool, cur_tag, hc);
+
+                                               if (url != NULL) {
+
+                                                       if (url_set != NULL) {
+                                                               struct rspamd_url *maybe_existing =
+                                                                               rspamd_url_set_add_or_return (url_set, url);
+                                                               if (maybe_existing == url) {
+                                                                       rspamd_process_html_url (pool, url, url_set,
+                                                                                       part_urls);
+                                                               }
+                                                               else {
+                                                                       url = maybe_existing;
+                                                                       /* Increase count to avoid odd checks failure */
+                                                                       url->count ++;
+                                                               }
+                                                       }
+
+                                                       href_offset = dest->len;
+                                               }
+                                       }
+
+                                       if (cur_tag->id == Tag_A) {
+                                               if (!balanced && cur_level && cur_level->prev) {
+                                                       struct html_tag *prev_tag;
+                                                       struct rspamd_url *prev_url;
+
+                                                       prev_tag = cur_level->prev->data;
+
+                                                       if (prev_tag->id == Tag_A &&
+                                                                       !(prev_tag->flags & (FL_CLOSING)) &&
+                                                                       prev_tag->extra) {
+                                                               prev_url = prev_tag->extra;
+
+                                                               rspamd_html_check_displayed_url (pool,
+                                                                               exceptions, url_set,
+                                                                               dest, href_offset,
+                                                                               prev_url);
+                                                       }
+                                               }
+
+                                               if (cur_tag->flags & (FL_CLOSING)) {
+
+                                                       /* Insert exception */
+                                                       if (url != NULL && (gint) dest->len > href_offset) {
+                                                               rspamd_html_check_displayed_url (pool,
+                                                                               exceptions, url_set,
+                                                                               dest, href_offset,
+                                                                               url);
+
+                                                       }
+
+                                                       href_offset = -1;
+                                                       url = NULL;
+                                               }
+                                       }
+                               }
+                               else if (cur_tag->id == Tag_BASE && !(cur_tag->flags & (FL_CLOSING))) {
+                                       /*
+                                        * Base is allowed only within head tag but HTML is retarded
+                                        */
+                                       if (hc->base_url == NULL) {
+                                               url = rspamd_html_process_url_tag (pool, cur_tag, hc);
+
+                                               if (url != NULL) {
+                                                       msg_debug_html ("got valid base tag");
+                                                       hc->base_url = url;
+                                                       cur_tag->extra = url;
+                                                       cur_tag->flags |= FL_HREF;
+                                               }
+                                               else {
+                                                       msg_debug_html ("got invalid base tag!");
+                                               }
+                                       }
+                               }
+
+                               if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) {
+                                       rspamd_html_process_img_tag (pool, cur_tag, hc, url_set,
+                                                       part_urls, dest);
+                               }
+                               else if (cur_tag->id == Tag_LINK && !(cur_tag->flags & FL_CLOSING)) {
+                                       rspamd_html_process_link_tag (pool, cur_tag, hc, url_set,
+                                                       part_urls);
+                               }
+                               else if (cur_tag->flags & FL_BLOCK) {
+                                       struct html_block *bl;
+
+                                       if (cur_tag->flags & FL_CLOSING) {
+                                               /* Just remove block element from the queue if any */
+                                               if (styles_blocks->length > 0) {
+                                                       g_queue_pop_tail (styles_blocks);
+                                               }
+                                       }
+                                       else {
+                                               rspamd_html_process_block_tag (pool, cur_tag, hc);
+                                               bl = cur_tag->extra;
+
+                                               if (bl) {
+                                                       rspamd_html_propagate_style (hc, cur_tag,
+                                                                       cur_tag->extra, styles_blocks);
+
+                                                       /* Check visibility */
+                                                       if (bl->font_size < 3 ||
+                                                               bl->font_color.d.comp.alpha < 10) {
+
+                                                               bl->visible = FALSE;
+                                                               msg_debug_html ("tag is not visible: font size: "
+                                                                                               "%d, alpha: %d",
+                                                                               (int)bl->font_size,
+                                                                               (int)bl->font_color.d.comp.alpha);
+                                                       }
+
+                                                       if (!bl->visible) {
+                                                               state = content_ignore;
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+                       else {
+                               state = content_write;
+                       }
+
+
+                       p++;
+                       c = p;
+                       cur_tag = NULL;
+                       break;
+               }
+       }
+
+       if (hc->html_tags) {
+               g_node_traverse (hc->html_tags, G_POST_ORDER, G_TRAVERSE_ALL, -1,
+                               rspamd_html_propagate_lengths, NULL);
+       }
+
+       g_queue_free (styles_blocks);
+       hc->parsed = dest;
+
+       return dest;
+}
+
+GByteArray*
+rspamd_html_process_part (rspamd_mempool_t *pool,
+               struct html_content *hc,
+               GByteArray *in)
+{
+       return rspamd_html_process_part_full (pool, hc, in, NULL,
+                       NULL, NULL, FALSE);
+}
+
+guint
+rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
+{
+       return rspamd::html::decode_html_entitles_inplace(s, len);
+}
diff --git a/src/libserver/html/html_entities.cxx b/src/libserver/html/html_entities.cxx
new file mode 100644 (file)
index 0000000..9be8c67
--- /dev/null
@@ -0,0 +1,2397 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "html_entities.hxx"
+
+#include <string>
+#include <contrib/robin-hood/robin_hood.h>
+#include <unicode/utf8.h>
+#include "libutil/cxx/util.hxx"
+
+namespace rspamd::html {
+
+struct html_entity_def {
+       std::string name;
+       std::string replacement;
+       unsigned code;
+};
+
+#define ENTITY_DEF(name, code, replacement) html_entity_def{(name), (replacement), (code)}
+
+static const auto html_entities_array = rspamd::array_of<html_entity_def>(
+               ENTITY_DEF("szlig", 223, "\xc3\x9f"),
+               ENTITY_DEF("prime", 8242, "\xe2\x80\xb2"),
+               ENTITY_DEF("lnsim", 8934, "\xe2\x8b\xa6"),
+               ENTITY_DEF("nvDash", 8877, "\xe2\x8a\xad"),
+               ENTITY_DEF("isinsv", 8947, "\xe2\x8b\xb3"),
+               ENTITY_DEF("notin", 8713, "\xe2\x88\x89"),
+               ENTITY_DEF("becaus", 8757, "\xe2\x88\xb5"),
+               ENTITY_DEF("Leftrightarrow", 8660, "\xe2\x87\x94"),
+               ENTITY_DEF("EmptySmallSquare", 9723, "\xe2\x97\xbb"),
+               ENTITY_DEF("SquareUnion", 8852, "\xe2\x8a\x94"),
+               ENTITY_DEF("subdot", 10941, "\xe2\xaa\xbd"),
+               ENTITY_DEF("Dstrok", 272, "\xc4\x90"),
+               ENTITY_DEF("rrarr", 8649, "\xe2\x87\x89"),
+               ENTITY_DEF("rArr", 8658, "\xe2\x87\x92"),
+               ENTITY_DEF("Aacute", 193, "\xc3\x81"),
+               ENTITY_DEF("kappa", 954, "\xce\xba"),
+               ENTITY_DEF("Iopf", 120128, "\xf0\x9d\x95\x80"),
+               ENTITY_DEF("hyphen", 8208, "\xe2\x80\x90"),
+               ENTITY_DEF("rarrbfs", 10528, "\xe2\xa4\xa0"),
+               ENTITY_DEF("supsetneqq", 10956, "\xe2\xab\x8c"),
+               ENTITY_DEF("gacute", 501, "\xc7\xb5"),
+               ENTITY_DEF("VeryThinSpace", 8202, "\xe2\x80\x8a"),
+               ENTITY_DEF("tint", 8749, "\xe2\x88\xad"),
+               ENTITY_DEF("ffr", 120099, "\xf0\x9d\x94\xa3"),
+               ENTITY_DEF("kgreen", 312, "\xc4\xb8"),
+               ENTITY_DEF("nis", 8956, "\xe2\x8b\xbc"),
+               ENTITY_DEF("NotRightTriangleBar", 10704, "\xe2\xa7\x90\xcc\xb8"),
+               ENTITY_DEF("Eogon", 280, "\xc4\x98"),
+               ENTITY_DEF("lbrke", 10635, "\xe2\xa6\x8b"),
+               ENTITY_DEF("phi", 966, "\xcf\x86"),
+               ENTITY_DEF("notnivc", 8957, "\xe2\x8b\xbd"),
+               ENTITY_DEF("utilde", 361, "\xc5\xa9"),
+               ENTITY_DEF("Fopf", 120125, "\xf0\x9d\x94\xbd"),
+               ENTITY_DEF("Vcy", 1042, "\xd0\x92"),
+               ENTITY_DEF("erDot", 8787, "\xe2\x89\x93"),
+               ENTITY_DEF("nsubE", 10949, "\xe2\xab\x85\xcc\xb8"),
+               ENTITY_DEF("egrave", 232, "\xc3\xa8"),
+               ENTITY_DEF("Lcedil", 315, "\xc4\xbb"),
+               ENTITY_DEF("lharul", 10602, "\xe2\xa5\xaa"),
+               ENTITY_DEF("middot", 183, "\xc2\xb7"),
+               ENTITY_DEF("ggg", 8921, "\xe2\x8b\x99"),
+               ENTITY_DEF("NestedLessLess", 8810, "\xe2\x89\xaa"),
+               ENTITY_DEF("tau", 964, "\xcf\x84"),
+               ENTITY_DEF("setmn", 8726, "\xe2\x88\x96"),
+               ENTITY_DEF("frac78", 8542, "\xe2\x85\x9e"),
+               ENTITY_DEF("para", 182, "\xc2\xb6"),
+               ENTITY_DEF("Rcedil", 342, "\xc5\x96"),
+               ENTITY_DEF("propto", 8733, "\xe2\x88\x9d"),
+               ENTITY_DEF("sqsubset", 8847, "\xe2\x8a\x8f"),
+               ENTITY_DEF("ensp", 8194, "\xe2\x80\x82"),
+               ENTITY_DEF("boxvH", 9578, "\xe2\x95\xaa"),
+               ENTITY_DEF("NotGreaterTilde", 8821, "\xe2\x89\xb5"),
+               ENTITY_DEF("ffllig", 64260, "\xef\xac\x84"),
+               ENTITY_DEF("kcedil", 311, "\xc4\xb7"),
+               ENTITY_DEF("omega", 969, "\xcf\x89"),
+               ENTITY_DEF("sime", 8771, "\xe2\x89\x83"),
+               ENTITY_DEF("LeftTriangleEqual", 8884, "\xe2\x8a\xb4"),
+               ENTITY_DEF("bsemi", 8271, "\xe2\x81\x8f"),
+               ENTITY_DEF("rdquor", 8221, "\xe2\x80\x9d"),
+               ENTITY_DEF("Utilde", 360, "\xc5\xa8"),
+               ENTITY_DEF("bsol", 92, "\x5c"),
+               ENTITY_DEF("risingdotseq", 8787, "\xe2\x89\x93"),
+               ENTITY_DEF("ultri", 9720, "\xe2\x97\xb8"),
+               ENTITY_DEF("rhov", 1009, "\xcf\xb1"),
+               ENTITY_DEF("TildeEqual", 8771, "\xe2\x89\x83"),
+               ENTITY_DEF("jukcy", 1108, "\xd1\x94"),
+               ENTITY_DEF("perp", 8869, "\xe2\x8a\xa5"),
+               ENTITY_DEF("capbrcup", 10825, "\xe2\xa9\x89"),
+               ENTITY_DEF("ltrie", 8884, "\xe2\x8a\xb4"),
+               ENTITY_DEF("LessTilde", 8818, "\xe2\x89\xb2"),
+               ENTITY_DEF("popf", 120161, "\xf0\x9d\x95\xa1"),
+               ENTITY_DEF("dbkarow", 10511, "\xe2\xa4\x8f"),
+               ENTITY_DEF("roang", 10221, "\xe2\x9f\xad"),
+               ENTITY_DEF("brvbar", 166, "\xc2\xa6"),
+               ENTITY_DEF("CenterDot", 183, "\xc2\xb7"),
+               ENTITY_DEF("notindot", 8949, "\xe2\x8b\xb5\xcc\xb8"),
+               ENTITY_DEF("supmult", 10946, "\xe2\xab\x82"),
+               ENTITY_DEF("multimap", 8888, "\xe2\x8a\xb8"),
+               ENTITY_DEF("frac34", 190, "\xc2\xbe"),
+               ENTITY_DEF("mapsto", 8614, "\xe2\x86\xa6"),
+               ENTITY_DEF("flat", 9837, "\xe2\x99\xad"),
+               ENTITY_DEF("updownarrow", 8597, "\xe2\x86\x95"),
+               ENTITY_DEF("gne", 10888, "\xe2\xaa\x88"),
+               ENTITY_DEF("nrarrc", 10547, "\xe2\xa4\xb3\xcc\xb8"),
+               ENTITY_DEF("suphsol", 10185, "\xe2\x9f\x89"),
+               ENTITY_DEF("nGtv", 8811, "\xe2\x89\xab\xcc\xb8"),
+               ENTITY_DEF("hopf", 120153, "\xf0\x9d\x95\x99"),
+               ENTITY_DEF("pointint", 10773, "\xe2\xa8\x95"),
+               ENTITY_DEF("glj", 10916, "\xe2\xaa\xa4"),
+               ENTITY_DEF("LeftDoubleBracket", 10214, "\xe2\x9f\xa6"),
+               ENTITY_DEF("NotSupersetEqual", 8841, "\xe2\x8a\x89"),
+               ENTITY_DEF("dot", 729, "\xcb\x99"),
+               ENTITY_DEF("tbrk", 9140, "\xe2\x8e\xb4"),
+               ENTITY_DEF("LeftUpDownVector", 10577, "\xe2\xa5\x91"),
+               ENTITY_DEF("uml", 168, "\xc2\xa8"),
+               ENTITY_DEF("bbrk", 9141, "\xe2\x8e\xb5"),
+               ENTITY_DEF("nearrow", 8599, "\xe2\x86\x97"),
+               ENTITY_DEF("backsimeq", 8909, "\xe2\x8b\x8d"),
+               ENTITY_DEF("dblac", 733, "\xcb\x9d"),
+               ENTITY_DEF("circleddash", 8861, "\xe2\x8a\x9d"),
+               ENTITY_DEF("ldsh", 8626, "\xe2\x86\xb2"),
+               ENTITY_DEF("sce", 10928, "\xe2\xaa\xb0"),
+               ENTITY_DEF("angst", 197, "\xc3\x85"),
+               ENTITY_DEF("yen", 165, "\xc2\xa5"),
+               ENTITY_DEF("nsupE", 10950, "\xe2\xab\x86\xcc\xb8"),
+               ENTITY_DEF("Uscr", 119984, "\xf0\x9d\x92\xb0"),
+               ENTITY_DEF("subplus", 10943, "\xe2\xaa\xbf"),
+               ENTITY_DEF("nleqq", 8806, "\xe2\x89\xa6\xcc\xb8"),
+               ENTITY_DEF("nprcue", 8928, "\xe2\x8b\xa0"),
+               ENTITY_DEF("Ocirc", 212, "\xc3\x94"),
+               ENTITY_DEF("disin", 8946, "\xe2\x8b\xb2"),
+               ENTITY_DEF("EqualTilde", 8770, "\xe2\x89\x82"),
+               ENTITY_DEF("YUcy", 1070, "\xd0\xae"),
+               ENTITY_DEF("Kscr", 119974, "\xf0\x9d\x92\xa6"),
+               ENTITY_DEF("lg", 8822, "\xe2\x89\xb6"),
+               ENTITY_DEF("nLeftrightarrow", 8654, "\xe2\x87\x8e"),
+               ENTITY_DEF("eplus", 10865, "\xe2\xa9\xb1"),
+               ENTITY_DEF("les", 10877, "\xe2\xa9\xbd"),
+               ENTITY_DEF("sfr", 120112, "\xf0\x9d\x94\xb0"),
+               ENTITY_DEF("HumpDownHump", 8782, "\xe2\x89\x8e"),
+               ENTITY_DEF("Fouriertrf", 8497, "\xe2\x84\xb1"),
+               ENTITY_DEF("Updownarrow", 8661, "\xe2\x87\x95"),
+               ENTITY_DEF("nrarr", 8603, "\xe2\x86\x9b"),
+               ENTITY_DEF("radic", 8730, "\xe2\x88\x9a"),
+               ENTITY_DEF("gnap", 10890, "\xe2\xaa\x8a"),
+               ENTITY_DEF("zeta", 950, "\xce\xb6"),
+               ENTITY_DEF("Qscr", 119980, "\xf0\x9d\x92\xac"),
+               ENTITY_DEF("NotRightTriangleEqual", 8941, "\xe2\x8b\xad"),
+               ENTITY_DEF("nshortmid", 8740, "\xe2\x88\xa4"),
+               ENTITY_DEF("SHCHcy", 1065, "\xd0\xa9"),
+               ENTITY_DEF("piv", 982, "\xcf\x96"),
+               ENTITY_DEF("angmsdaa", 10664, "\xe2\xa6\xa8"),
+               ENTITY_DEF("curlywedge", 8911, "\xe2\x8b\x8f"),
+               ENTITY_DEF("sqcaps", 8851, "\xe2\x8a\x93\xef\xb8\x80"),
+               ENTITY_DEF("sum", 8721, "\xe2\x88\x91"),
+               ENTITY_DEF("rarrtl", 8611, "\xe2\x86\xa3"),
+               ENTITY_DEF("gescc", 10921, "\xe2\xaa\xa9"),
+               ENTITY_DEF("sup", 8835, "\xe2\x8a\x83"),
+               ENTITY_DEF("smid", 8739, "\xe2\x88\xa3"),
+               ENTITY_DEF("cularr", 8630, "\xe2\x86\xb6"),
+               ENTITY_DEF("olcross", 10683, "\xe2\xa6\xbb"),
+               ENTITY_DEF("GT", 62, "\x3e"),
+               ENTITY_DEF("scap", 10936, "\xe2\xaa\xb8"),
+               ENTITY_DEF("capcup", 10823, "\xe2\xa9\x87"),
+               ENTITY_DEF("NotSquareSubsetEqual", 8930, "\xe2\x8b\xa2"),
+               ENTITY_DEF("uhblk", 9600, "\xe2\x96\x80"),
+               ENTITY_DEF("latail", 10521, "\xe2\xa4\x99"),
+               ENTITY_DEF("smtes", 10924, "\xe2\xaa\xac\xef\xb8\x80"),
+               ENTITY_DEF("RoundImplies", 10608, "\xe2\xa5\xb0"),
+               ENTITY_DEF("wreath", 8768, "\xe2\x89\x80"),
+               ENTITY_DEF("curlyvee", 8910, "\xe2\x8b\x8e"),
+               ENTITY_DEF("uscr", 120010, "\xf0\x9d\x93\x8a"),
+               ENTITY_DEF("nleftrightarrow", 8622, "\xe2\x86\xae"),
+               ENTITY_DEF("ucy", 1091, "\xd1\x83"),
+               ENTITY_DEF("nvge", 8805, "\xe2\x89\xa5\xe2\x83\x92"),
+               ENTITY_DEF("bnot", 8976, "\xe2\x8c\x90"),
+               ENTITY_DEF("alefsym", 8501, "\xe2\x84\xb5"),
+               ENTITY_DEF("star", 9734, "\xe2\x98\x86"),
+               ENTITY_DEF("boxHd", 9572, "\xe2\x95\xa4"),
+               ENTITY_DEF("vsubnE", 10955, "\xe2\xab\x8b\xef\xb8\x80"),
+               ENTITY_DEF("Popf", 8473, "\xe2\x84\x99"),
+               ENTITY_DEF("simgE", 10912, "\xe2\xaa\xa0"),
+               ENTITY_DEF("upsilon", 965, "\xcf\x85"),
+               ENTITY_DEF("NoBreak", 8288, "\xe2\x81\xa0"),
+               ENTITY_DEF("realine", 8475, "\xe2\x84\x9b"),
+               ENTITY_DEF("frac38", 8540, "\xe2\x85\x9c"),
+               ENTITY_DEF("YAcy", 1071, "\xd0\xaf"),
+               ENTITY_DEF("bnequiv", 8801, "\xe2\x89\xa1\xe2\x83\xa5"),
+               ENTITY_DEF("cudarrr", 10549, "\xe2\xa4\xb5"),
+               ENTITY_DEF("lsime", 10893, "\xe2\xaa\x8d"),
+               ENTITY_DEF("lowbar", 95, "\x5f"),
+               ENTITY_DEF("utdot", 8944, "\xe2\x8b\xb0"),
+               ENTITY_DEF("ReverseElement", 8715, "\xe2\x88\x8b"),
+               ENTITY_DEF("nshortparallel", 8742, "\xe2\x88\xa6"),
+               ENTITY_DEF("DJcy", 1026, "\xd0\x82"),
+               ENTITY_DEF("nsube", 8840, "\xe2\x8a\x88"),
+               ENTITY_DEF("VDash", 8875, "\xe2\x8a\xab"),
+               ENTITY_DEF("Ncaron", 327, "\xc5\x87"),
+               ENTITY_DEF("LeftUpVector", 8639, "\xe2\x86\xbf"),
+               ENTITY_DEF("Kcy", 1050, "\xd0\x9a"),
+               ENTITY_DEF("NotLeftTriangleEqual", 8940, "\xe2\x8b\xac"),
+               ENTITY_DEF("nvHarr", 10500, "\xe2\xa4\x84"),
+               ENTITY_DEF("lotimes", 10804, "\xe2\xa8\xb4"),
+               ENTITY_DEF("RightFloor", 8971, "\xe2\x8c\x8b"),
+               ENTITY_DEF("succ", 8827, "\xe2\x89\xbb"),
+               ENTITY_DEF("Ucy", 1059, "\xd0\xa3"),
+               ENTITY_DEF("darr", 8595, "\xe2\x86\x93"),
+               ENTITY_DEF("lbarr", 10508, "\xe2\xa4\x8c"),
+               ENTITY_DEF("xfr", 120117, "\xf0\x9d\x94\xb5"),
+               ENTITY_DEF("zopf", 120171, "\xf0\x9d\x95\xab"),
+               ENTITY_DEF("Phi", 934, "\xce\xa6"),
+               ENTITY_DEF("ord", 10845, "\xe2\xa9\x9d"),
+               ENTITY_DEF("iinfin", 10716, "\xe2\xa7\x9c"),
+               ENTITY_DEF("Xfr", 120091, "\xf0\x9d\x94\x9b"),
+               ENTITY_DEF("qint", 10764, "\xe2\xa8\x8c"),
+               ENTITY_DEF("Upsilon", 933, "\xce\xa5"),
+               ENTITY_DEF("NotSubset", 8834, "\xe2\x8a\x82\xe2\x83\x92"),
+               ENTITY_DEF("gfr", 120100, "\xf0\x9d\x94\xa4"),
+               ENTITY_DEF("notnivb", 8958, "\xe2\x8b\xbe"),
+               ENTITY_DEF("Afr", 120068, "\xf0\x9d\x94\x84"),
+               ENTITY_DEF("ge", 8805, "\xe2\x89\xa5"),
+               ENTITY_DEF("iexcl", 161, "\xc2\xa1"),
+               ENTITY_DEF("dfr", 120097, "\xf0\x9d\x94\xa1"),
+               ENTITY_DEF("rsaquo", 8250, "\xe2\x80\xba"),
+               ENTITY_DEF("xcap", 8898, "\xe2\x8b\x82"),
+               ENTITY_DEF("Jopf", 120129, "\xf0\x9d\x95\x81"),
+               ENTITY_DEF("Hstrok", 294, "\xc4\xa6"),
+               ENTITY_DEF("ldca", 10550, "\xe2\xa4\xb6"),
+               ENTITY_DEF("lmoust", 9136, "\xe2\x8e\xb0"),
+               ENTITY_DEF("wcirc", 373, "\xc5\xb5"),
+               ENTITY_DEF("DownRightVector", 8641, "\xe2\x87\x81"),
+               ENTITY_DEF("LessFullEqual", 8806, "\xe2\x89\xa6"),
+               ENTITY_DEF("dotsquare", 8865, "\xe2\x8a\xa1"),
+               ENTITY_DEF("zhcy", 1078, "\xd0\xb6"),
+               ENTITY_DEF("mDDot", 8762, "\xe2\x88\xba"),
+               ENTITY_DEF("Prime", 8243, "\xe2\x80\xb3"),
+               ENTITY_DEF("prec", 8826, "\xe2\x89\xba"),
+               ENTITY_DEF("swnwar", 10538, "\xe2\xa4\xaa"),
+               ENTITY_DEF("COPY", 169, "\xc2\xa9"),
+               ENTITY_DEF("cong", 8773, "\xe2\x89\x85"),
+               ENTITY_DEF("sacute", 347, "\xc5\x9b"),
+               ENTITY_DEF("Nopf", 8469, "\xe2\x84\x95"),
+               ENTITY_DEF("it", 8290, "\xe2\x81\xa2"),
+               ENTITY_DEF("SOFTcy", 1068, "\xd0\xac"),
+               ENTITY_DEF("uuarr", 8648, "\xe2\x87\x88"),
+               ENTITY_DEF("iota", 953, "\xce\xb9"),
+               ENTITY_DEF("notinE", 8953, "\xe2\x8b\xb9\xcc\xb8"),
+               ENTITY_DEF("jfr", 120103, "\xf0\x9d\x94\xa7"),
+               ENTITY_DEF("QUOT", 34, "\x22"),
+               ENTITY_DEF("vsupnE", 10956, "\xe2\xab\x8c\xef\xb8\x80"),
+               ENTITY_DEF("igrave", 236, "\xc3\xac"),
+               ENTITY_DEF("bsim", 8765, "\xe2\x88\xbd"),
+               ENTITY_DEF("npreceq", 10927, "\xe2\xaa\xaf\xcc\xb8"),
+               ENTITY_DEF("zcaron", 382, "\xc5\xbe"),
+               ENTITY_DEF("DD", 8517, "\xe2\x85\x85"),
+               ENTITY_DEF("gamma", 947, "\xce\xb3"),
+               ENTITY_DEF("homtht", 8763, "\xe2\x88\xbb"),
+               ENTITY_DEF("NonBreakingSpace", 160, "\xc2\xa0"),
+               ENTITY_DEF("Proportion", 8759, "\xe2\x88\xb7"),
+               ENTITY_DEF("nedot", 8784, "\xe2\x89\x90\xcc\xb8"),
+               ENTITY_DEF("nabla", 8711, "\xe2\x88\x87"),
+               ENTITY_DEF("ac", 8766, "\xe2\x88\xbe"),
+               ENTITY_DEF("nsupe", 8841, "\xe2\x8a\x89"),
+               ENTITY_DEF("ell", 8467, "\xe2\x84\x93"),
+               ENTITY_DEF("boxvR", 9566, "\xe2\x95\x9e"),
+               ENTITY_DEF("LowerRightArrow", 8600, "\xe2\x86\x98"),
+               ENTITY_DEF("boxHu", 9575, "\xe2\x95\xa7"),
+               ENTITY_DEF("lE", 8806, "\xe2\x89\xa6"),
+               ENTITY_DEF("dzigrarr", 10239, "\xe2\x9f\xbf"),
+               ENTITY_DEF("rfloor", 8971, "\xe2\x8c\x8b"),
+               ENTITY_DEF("gneq", 10888, "\xe2\xaa\x88"),
+               ENTITY_DEF("rightleftharpoons", 8652, "\xe2\x87\x8c"),
+               ENTITY_DEF("gtquest", 10876, "\xe2\xa9\xbc"),
+               ENTITY_DEF("searhk", 10533, "\xe2\xa4\xa5"),
+               ENTITY_DEF("gesdoto", 10882, "\xe2\xaa\x82"),
+               ENTITY_DEF("cross", 10007, "\xe2\x9c\x97"),
+               ENTITY_DEF("rdquo", 8221, "\xe2\x80\x9d"),
+               ENTITY_DEF("sqsupset", 8848, "\xe2\x8a\x90"),
+               ENTITY_DEF("divonx", 8903, "\xe2\x8b\x87"),
+               ENTITY_DEF("lat", 10923, "\xe2\xaa\xab"),
+               ENTITY_DEF("rmoustache", 9137, "\xe2\x8e\xb1"),
+               ENTITY_DEF("succapprox", 10936, "\xe2\xaa\xb8"),
+               ENTITY_DEF("nhpar", 10994, "\xe2\xab\xb2"),
+               ENTITY_DEF("sharp", 9839, "\xe2\x99\xaf"),
+               ENTITY_DEF("lrcorner", 8991, "\xe2\x8c\x9f"),
+               ENTITY_DEF("Vscr", 119985, "\xf0\x9d\x92\xb1"),
+               ENTITY_DEF("varsigma", 962, "\xcf\x82"),
+               ENTITY_DEF("bsolb", 10693, "\xe2\xa7\x85"),
+               ENTITY_DEF("cupcap", 10822, "\xe2\xa9\x86"),
+               ENTITY_DEF("leftrightarrow", 8596, "\xe2\x86\x94"),
+               ENTITY_DEF("LeftTee", 8867, "\xe2\x8a\xa3"),
+               ENTITY_DEF("Sqrt", 8730, "\xe2\x88\x9a"),
+               ENTITY_DEF("Odblac", 336, "\xc5\x90"),
+               ENTITY_DEF("ocir", 8858, "\xe2\x8a\x9a"),
+               ENTITY_DEF("eqslantless", 10901, "\xe2\xaa\x95"),
+               ENTITY_DEF("supedot", 10948, "\xe2\xab\x84"),
+               ENTITY_DEF("intercal", 8890, "\xe2\x8a\xba"),
+               ENTITY_DEF("Gbreve", 286, "\xc4\x9e"),
+               ENTITY_DEF("xrArr", 10233, "\xe2\x9f\xb9"),
+               ENTITY_DEF("NotTildeEqual", 8772, "\xe2\x89\x84"),
+               ENTITY_DEF("Bfr", 120069, "\xf0\x9d\x94\x85"),
+               ENTITY_DEF("Iuml", 207, "\xc3\x8f"),
+               ENTITY_DEF("leg", 8922, "\xe2\x8b\x9a"),
+               ENTITY_DEF("boxhU", 9576, "\xe2\x95\xa8"),
+               ENTITY_DEF("Gopf", 120126, "\xf0\x9d\x94\xbe"),
+               ENTITY_DEF("af", 8289, "\xe2\x81\xa1"),
+               ENTITY_DEF("xwedge", 8896, "\xe2\x8b\x80"),
+               ENTITY_DEF("precapprox", 10935, "\xe2\xaa\xb7"),
+               ENTITY_DEF("lcedil", 316, "\xc4\xbc"),
+               ENTITY_DEF("between", 8812, "\xe2\x89\xac"),
+               ENTITY_DEF("Oslash", 216, "\xc3\x98"),
+               ENTITY_DEF("breve", 728, "\xcb\x98"),
+               ENTITY_DEF("caps", 8745, "\xe2\x88\xa9\xef\xb8\x80"),
+               ENTITY_DEF("vangrt", 10652, "\xe2\xa6\x9c"),
+               ENTITY_DEF("lagran", 8466, "\xe2\x84\x92"),
+               ENTITY_DEF("kopf", 120156, "\xf0\x9d\x95\x9c"),
+               ENTITY_DEF("ReverseUpEquilibrium", 10607, "\xe2\xa5\xaf"),
+               ENTITY_DEF("nlsim", 8820, "\xe2\x89\xb4"),
+               ENTITY_DEF("Cap", 8914, "\xe2\x8b\x92"),
+               ENTITY_DEF("angmsdac", 10666, "\xe2\xa6\xaa"),
+               ENTITY_DEF("iocy", 1105, "\xd1\x91"),
+               ENTITY_DEF("seswar", 10537, "\xe2\xa4\xa9"),
+               ENTITY_DEF("dzcy", 1119, "\xd1\x9f"),
+               ENTITY_DEF("nsubset", 8834, "\xe2\x8a\x82\xe2\x83\x92"),
+               ENTITY_DEF("cup", 8746, "\xe2\x88\xaa"),
+               ENTITY_DEF("npar", 8742, "\xe2\x88\xa6"),
+               ENTITY_DEF("late", 10925, "\xe2\xaa\xad"),
+               ENTITY_DEF("plussim", 10790, "\xe2\xa8\xa6"),
+               ENTITY_DEF("Darr", 8609, "\xe2\x86\xa1"),
+               ENTITY_DEF("nexist", 8708, "\xe2\x88\x84"),
+               ENTITY_DEF("cent", 162, "\xc2\xa2"),
+               ENTITY_DEF("khcy", 1093, "\xd1\x85"),
+               ENTITY_DEF("smallsetminus", 8726, "\xe2\x88\x96"),
+               ENTITY_DEF("ycirc", 375, "\xc5\xb7"),
+               ENTITY_DEF("lharu", 8636, "\xe2\x86\xbc"),
+               ENTITY_DEF("upuparrows", 8648, "\xe2\x87\x88"),
+               ENTITY_DEF("sigmaf", 962, "\xcf\x82"),
+               ENTITY_DEF("nltri", 8938, "\xe2\x8b\xaa"),
+               ENTITY_DEF("mstpos", 8766, "\xe2\x88\xbe"),
+               ENTITY_DEF("Zopf", 8484, "\xe2\x84\xa4"),
+               ENTITY_DEF("dwangle", 10662, "\xe2\xa6\xa6"),
+               ENTITY_DEF("bowtie", 8904, "\xe2\x8b\x88"),
+               ENTITY_DEF("Dfr", 120071, "\xf0\x9d\x94\x87"),
+               ENTITY_DEF("iacute", 237, "\xc3\xad"),
+               ENTITY_DEF("njcy", 1114, "\xd1\x9a"),
+               ENTITY_DEF("cfr", 120096, "\xf0\x9d\x94\xa0"),
+               ENTITY_DEF("TripleDot", 8411, "\xe2\x83\x9b"),
+               ENTITY_DEF("Or", 10836, "\xe2\xa9\x94"),
+               ENTITY_DEF("blk34", 9619, "\xe2\x96\x93"),
+               ENTITY_DEF("equiv", 8801, "\xe2\x89\xa1"),
+               ENTITY_DEF("fflig", 64256, "\xef\xac\x80"),
+               ENTITY_DEF("Rang", 10219, "\xe2\x9f\xab"),
+               ENTITY_DEF("Wopf", 120142, "\xf0\x9d\x95\x8e"),
+               ENTITY_DEF("boxUl", 9564, "\xe2\x95\x9c"),
+               ENTITY_DEF("frac12", 189, "\xc2\xbd"),
+               ENTITY_DEF("clubs", 9827, "\xe2\x99\xa3"),
+               ENTITY_DEF("amalg", 10815, "\xe2\xa8\xbf"),
+               ENTITY_DEF("Lang", 10218, "\xe2\x9f\xaa"),
+               ENTITY_DEF("asymp", 8776, "\xe2\x89\x88"),
+               ENTITY_DEF("models", 8871, "\xe2\x8a\xa7"),
+               ENTITY_DEF("emptyset", 8709, "\xe2\x88\x85"),
+               ENTITY_DEF("Tscr", 119983, "\xf0\x9d\x92\xaf"),
+               ENTITY_DEF("nleftarrow", 8602, "\xe2\x86\x9a"),
+               ENTITY_DEF("Omacr", 332, "\xc5\x8c"),
+               ENTITY_DEF("gtrarr", 10616, "\xe2\xa5\xb8"),
+               ENTITY_DEF("excl", 33, "\x21"),
+               ENTITY_DEF("rarrw", 8605, "\xe2\x86\x9d"),
+               ENTITY_DEF("abreve", 259, "\xc4\x83"),
+               ENTITY_DEF("CircleTimes", 8855, "\xe2\x8a\x97"),
+               ENTITY_DEF("aopf", 120146, "\xf0\x9d\x95\x92"),
+               ENTITY_DEF("eqvparsl", 10725, "\xe2\xa7\xa5"),
+               ENTITY_DEF("boxv", 9474, "\xe2\x94\x82"),
+               ENTITY_DEF("SuchThat", 8715, "\xe2\x88\x8b"),
+               ENTITY_DEF("varphi", 981, "\xcf\x95"),
+               ENTITY_DEF("Ropf", 8477, "\xe2\x84\x9d"),
+               ENTITY_DEF("rscr", 120007, "\xf0\x9d\x93\x87"),
+               ENTITY_DEF("Rrightarrow", 8667, "\xe2\x87\x9b"),
+               ENTITY_DEF("equest", 8799, "\xe2\x89\x9f"),
+               ENTITY_DEF("ntilde", 241, "\xc3\xb1"),
+               ENTITY_DEF("Escr", 8496, "\xe2\x84\xb0"),
+               ENTITY_DEF("Lopf", 120131, "\xf0\x9d\x95\x83"),
+               ENTITY_DEF("GreaterGreater", 10914, "\xe2\xaa\xa2"),
+               ENTITY_DEF("pluscir", 10786, "\xe2\xa8\xa2"),
+               ENTITY_DEF("nsupset", 8835, "\xe2\x8a\x83\xe2\x83\x92"),
+               ENTITY_DEF("uArr", 8657, "\xe2\x87\x91"),
+               ENTITY_DEF("nwarhk", 10531, "\xe2\xa4\xa3"),
+               ENTITY_DEF("Ycirc", 374, "\xc5\xb6"),
+               ENTITY_DEF("tdot", 8411, "\xe2\x83\x9b"),
+               ENTITY_DEF("circledS", 9416, "\xe2\x93\x88"),
+               ENTITY_DEF("lhard", 8637, "\xe2\x86\xbd"),
+               ENTITY_DEF("iukcy", 1110, "\xd1\x96"),
+               ENTITY_DEF("PrecedesSlantEqual", 8828, "\xe2\x89\xbc"),
+               ENTITY_DEF("Sfr", 120086, "\xf0\x9d\x94\x96"),
+               ENTITY_DEF("egs", 10902, "\xe2\xaa\x96"),
+               ENTITY_DEF("oelig", 339, "\xc5\x93"),
+               ENTITY_DEF("bigtriangledown", 9661, "\xe2\x96\xbd"),
+               ENTITY_DEF("EmptyVerySmallSquare", 9643, "\xe2\x96\xab"),
+               ENTITY_DEF("Backslash", 8726, "\xe2\x88\x96"),
+               ENTITY_DEF("nscr", 120003, "\xf0\x9d\x93\x83"),
+               ENTITY_DEF("uogon", 371, "\xc5\xb3"),
+               ENTITY_DEF("circeq", 8791, "\xe2\x89\x97"),
+               ENTITY_DEF("check", 10003, "\xe2\x9c\x93"),
+               ENTITY_DEF("Sup", 8913, "\xe2\x8b\x91"),
+               ENTITY_DEF("Rcaron", 344, "\xc5\x98"),
+               ENTITY_DEF("lneqq", 8808, "\xe2\x89\xa8"),
+               ENTITY_DEF("lrhar", 8651, "\xe2\x87\x8b"),
+               ENTITY_DEF("ulcorn", 8988, "\xe2\x8c\x9c"),
+               ENTITY_DEF("timesd", 10800, "\xe2\xa8\xb0"),
+               ENTITY_DEF("Sum", 8721, "\xe2\x88\x91"),
+               ENTITY_DEF("varpropto", 8733, "\xe2\x88\x9d"),
+               ENTITY_DEF("Lcaron", 317, "\xc4\xbd"),
+               ENTITY_DEF("lbrkslu", 10637, "\xe2\xa6\x8d"),
+               ENTITY_DEF("AElig", 198, "\xc3\x86"),
+               ENTITY_DEF("varr", 8597, "\xe2\x86\x95"),
+               ENTITY_DEF("nvinfin", 10718, "\xe2\xa7\x9e"),
+               ENTITY_DEF("leq", 8804, "\xe2\x89\xa4"),
+               ENTITY_DEF("biguplus", 10756, "\xe2\xa8\x84"),
+               ENTITY_DEF("rpar", 41, "\x29"),
+               ENTITY_DEF("eng", 331, "\xc5\x8b"),
+               ENTITY_DEF("NegativeThinSpace", 8203, "\xe2\x80\x8b"),
+               ENTITY_DEF("lesssim", 8818, "\xe2\x89\xb2"),
+               ENTITY_DEF("lBarr", 10510, "\xe2\xa4\x8e"),
+               ENTITY_DEF("LeftUpTeeVector", 10592, "\xe2\xa5\xa0"),
+               ENTITY_DEF("gnE", 8809, "\xe2\x89\xa9"),
+               ENTITY_DEF("efr", 120098, "\xf0\x9d\x94\xa2"),
+               ENTITY_DEF("barvee", 8893, "\xe2\x8a\xbd"),
+               ENTITY_DEF("ee", 8519, "\xe2\x85\x87"),
+               ENTITY_DEF("Uogon", 370, "\xc5\xb2"),
+               ENTITY_DEF("gnapprox", 10890, "\xe2\xaa\x8a"),
+               ENTITY_DEF("olcir", 10686, "\xe2\xa6\xbe"),
+               ENTITY_DEF("boxUL", 9565, "\xe2\x95\x9d"),
+               ENTITY_DEF("Gg", 8921, "\xe2\x8b\x99"),
+               ENTITY_DEF("CloseCurlyQuote", 8217, "\xe2\x80\x99"),
+               ENTITY_DEF("leftharpoondown", 8637, "\xe2\x86\xbd"),
+               ENTITY_DEF("vfr", 120115, "\xf0\x9d\x94\xb3"),
+               ENTITY_DEF("gvertneqq", 8809, "\xe2\x89\xa9\xef\xb8\x80"),
+               ENTITY_DEF("ouml", 246, "\xc3\xb6"),
+               ENTITY_DEF("raemptyv", 10675, "\xe2\xa6\xb3"),
+               ENTITY_DEF("Zcaron", 381, "\xc5\xbd"),
+               ENTITY_DEF("scE", 10932, "\xe2\xaa\xb4"),
+               ENTITY_DEF("boxvh", 9532, "\xe2\x94\xbc"),
+               ENTITY_DEF("ominus", 8854, "\xe2\x8a\x96"),
+               ENTITY_DEF("oopf", 120160, "\xf0\x9d\x95\xa0"),
+               ENTITY_DEF("nsucceq", 10928, "\xe2\xaa\xb0\xcc\xb8"),
+               ENTITY_DEF("RBarr", 10512, "\xe2\xa4\x90"),
+               ENTITY_DEF("iprod", 10812, "\xe2\xa8\xbc"),
+               ENTITY_DEF("lvnE", 8808, "\xe2\x89\xa8\xef\xb8\x80"),
+               ENTITY_DEF("andand", 10837, "\xe2\xa9\x95"),
+               ENTITY_DEF("upharpoonright", 8638, "\xe2\x86\xbe"),
+               ENTITY_DEF("ncongdot", 10861, "\xe2\xa9\xad\xcc\xb8"),
+               ENTITY_DEF("drcrop", 8972, "\xe2\x8c\x8c"),
+               ENTITY_DEF("nsimeq", 8772, "\xe2\x89\x84"),
+               ENTITY_DEF("subsub", 10965, "\xe2\xab\x95"),
+               ENTITY_DEF("hardcy", 1098, "\xd1\x8a"),
+               ENTITY_DEF("leqslant", 10877, "\xe2\xa9\xbd"),
+               ENTITY_DEF("uharl", 8639, "\xe2\x86\xbf"),
+               ENTITY_DEF("expectation", 8496, "\xe2\x84\xb0"),
+               ENTITY_DEF("mdash", 8212, "\xe2\x80\x94"),
+               ENTITY_DEF("VerticalTilde", 8768, "\xe2\x89\x80"),
+               ENTITY_DEF("rdldhar", 10601, "\xe2\xa5\xa9"),
+               ENTITY_DEF("leftharpoonup", 8636, "\xe2\x86\xbc"),
+               ENTITY_DEF("mu", 956, "\xce\xbc"),
+               ENTITY_DEF("curarrm", 10556, "\xe2\xa4\xbc"),
+               ENTITY_DEF("Cdot", 266, "\xc4\x8a"),
+               ENTITY_DEF("NotTildeTilde", 8777, "\xe2\x89\x89"),
+               ENTITY_DEF("boxul", 9496, "\xe2\x94\x98"),
+               ENTITY_DEF("planckh", 8462, "\xe2\x84\x8e"),
+               ENTITY_DEF("CapitalDifferentialD", 8517, "\xe2\x85\x85"),
+               ENTITY_DEF("boxDL", 9559, "\xe2\x95\x97"),
+               ENTITY_DEF("cupbrcap", 10824, "\xe2\xa9\x88"),
+               ENTITY_DEF("boxdL", 9557, "\xe2\x95\x95"),
+               ENTITY_DEF("supe", 8839, "\xe2\x8a\x87"),
+               ENTITY_DEF("nvlt", 60, "\x3c\xe2\x83\x92"),
+               ENTITY_DEF("par", 8741, "\xe2\x88\xa5"),
+               ENTITY_DEF("InvisibleComma", 8291, "\xe2\x81\xa3"),
+               ENTITY_DEF("ring", 730, "\xcb\x9a"),
+               ENTITY_DEF("nvap", 8781, "\xe2\x89\x8d\xe2\x83\x92"),
+               ENTITY_DEF("veeeq", 8794, "\xe2\x89\x9a"),
+               ENTITY_DEF("Hfr", 8460, "\xe2\x84\x8c"),
+               ENTITY_DEF("dstrok", 273, "\xc4\x91"),
+               ENTITY_DEF("gesles", 10900, "\xe2\xaa\x94"),
+               ENTITY_DEF("dash", 8208, "\xe2\x80\x90"),
+               ENTITY_DEF("SHcy", 1064, "\xd0\xa8"),
+               ENTITY_DEF("congdot", 10861, "\xe2\xa9\xad"),
+               ENTITY_DEF("imagline", 8464, "\xe2\x84\x90"),
+               ENTITY_DEF("ncy", 1085, "\xd0\xbd"),
+               ENTITY_DEF("bigstar", 9733, "\xe2\x98\x85"),
+               ENTITY_DEF("REG", 174, "\xc2\xae"),
+               ENTITY_DEF("triangleq", 8796, "\xe2\x89\x9c"),
+               ENTITY_DEF("rsqb", 93, "\x5d"),
+               ENTITY_DEF("ddarr", 8650, "\xe2\x87\x8a"),
+               ENTITY_DEF("csub", 10959, "\xe2\xab\x8f"),
+               ENTITY_DEF("quest", 63, "\x3f"),
+               ENTITY_DEF("Star", 8902, "\xe2\x8b\x86"),
+               ENTITY_DEF("LT", 60, "\x3c"),
+               ENTITY_DEF("ncong", 8775, "\xe2\x89\x87"),
+               ENTITY_DEF("prnE", 10933, "\xe2\xaa\xb5"),
+               ENTITY_DEF("bigtriangleup", 9651, "\xe2\x96\xb3"),
+               ENTITY_DEF("Tilde", 8764, "\xe2\x88\xbc"),
+               ENTITY_DEF("ltrif", 9666, "\xe2\x97\x82"),
+               ENTITY_DEF("ldrdhar", 10599, "\xe2\xa5\xa7"),
+               ENTITY_DEF("lcaron", 318, "\xc4\xbe"),
+               ENTITY_DEF("equivDD", 10872, "\xe2\xa9\xb8"),
+               ENTITY_DEF("lHar", 10594, "\xe2\xa5\xa2"),
+               ENTITY_DEF("vBar", 10984, "\xe2\xab\xa8"),
+               ENTITY_DEF("Mopf", 120132, "\xf0\x9d\x95\x84"),
+               ENTITY_DEF("LeftArrow", 8592, "\xe2\x86\x90"),
+               ENTITY_DEF("Rho", 929, "\xce\xa1"),
+               ENTITY_DEF("Ccirc", 264, "\xc4\x88"),
+               ENTITY_DEF("ifr", 120102, "\xf0\x9d\x94\xa6"),
+               ENTITY_DEF("cacute", 263, "\xc4\x87"),
+               ENTITY_DEF("centerdot", 183, "\xc2\xb7"),
+               ENTITY_DEF("dollar", 36, "\x24"),
+               ENTITY_DEF("lang", 10216, "\xe2\x9f\xa8"),
+               ENTITY_DEF("curvearrowright", 8631, "\xe2\x86\xb7"),
+               ENTITY_DEF("half", 189, "\xc2\xbd"),
+               ENTITY_DEF("Ecy", 1069, "\xd0\xad"),
+               ENTITY_DEF("rcub", 125, "\x7d"),
+               ENTITY_DEF("rcy", 1088, "\xd1\x80"),
+               ENTITY_DEF("isins", 8948, "\xe2\x8b\xb4"),
+               ENTITY_DEF("bsolhsub", 10184, "\xe2\x9f\x88"),
+               ENTITY_DEF("boxuL", 9563, "\xe2\x95\x9b"),
+               ENTITY_DEF("shchcy", 1097, "\xd1\x89"),
+               ENTITY_DEF("cwconint", 8754, "\xe2\x88\xb2"),
+               ENTITY_DEF("euro", 8364, "\xe2\x82\xac"),
+               ENTITY_DEF("lesseqqgtr", 10891, "\xe2\xaa\x8b"),
+               ENTITY_DEF("sim", 8764, "\xe2\x88\xbc"),
+               ENTITY_DEF("rarrc", 10547, "\xe2\xa4\xb3"),
+               ENTITY_DEF("boxdl", 9488, "\xe2\x94\x90"),
+               ENTITY_DEF("Epsilon", 917, "\xce\x95"),
+               ENTITY_DEF("iiiint", 10764, "\xe2\xa8\x8c"),
+               ENTITY_DEF("Rightarrow", 8658, "\xe2\x87\x92"),
+               ENTITY_DEF("conint", 8750, "\xe2\x88\xae"),
+               ENTITY_DEF("boxDl", 9558, "\xe2\x95\x96"),
+               ENTITY_DEF("kappav", 1008, "\xcf\xb0"),
+               ENTITY_DEF("profsurf", 8979, "\xe2\x8c\x93"),
+               ENTITY_DEF("auml", 228, "\xc3\xa4"),
+               ENTITY_DEF("heartsuit", 9829, "\xe2\x99\xa5"),
+               ENTITY_DEF("eacute", 233, "\xc3\xa9"),
+               ENTITY_DEF("gt", 62, "\x3e"),
+               ENTITY_DEF("Gcedil", 290, "\xc4\xa2"),
+               ENTITY_DEF("easter", 10862, "\xe2\xa9\xae"),
+               ENTITY_DEF("Tcy", 1058, "\xd0\xa2"),
+               ENTITY_DEF("swarrow", 8601, "\xe2\x86\x99"),
+               ENTITY_DEF("lopf", 120157, "\xf0\x9d\x95\x9d"),
+               ENTITY_DEF("Agrave", 192, "\xc3\x80"),
+               ENTITY_DEF("Aring", 197, "\xc3\x85"),
+               ENTITY_DEF("fpartint", 10765, "\xe2\xa8\x8d"),
+               ENTITY_DEF("xoplus", 10753, "\xe2\xa8\x81"),
+               ENTITY_DEF("LeftDownTeeVector", 10593, "\xe2\xa5\xa1"),
+               ENTITY_DEF("int", 8747, "\xe2\x88\xab"),
+               ENTITY_DEF("Zeta", 918, "\xce\x96"),
+               ENTITY_DEF("loz", 9674, "\xe2\x97\x8a"),
+               ENTITY_DEF("ncup", 10818, "\xe2\xa9\x82"),
+               ENTITY_DEF("napE", 10864, "\xe2\xa9\xb0\xcc\xb8"),
+               ENTITY_DEF("csup", 10960, "\xe2\xab\x90"),
+               ENTITY_DEF("Ncedil", 325, "\xc5\x85"),
+               ENTITY_DEF("cuwed", 8911, "\xe2\x8b\x8f"),
+               ENTITY_DEF("Dot", 168, "\xc2\xa8"),
+               ENTITY_DEF("SquareIntersection", 8851, "\xe2\x8a\x93"),
+               ENTITY_DEF("map", 8614, "\xe2\x86\xa6"),
+               ENTITY_DEF("aelig", 230, "\xc3\xa6"),
+               ENTITY_DEF("RightArrow", 8594, "\xe2\x86\x92"),
+               ENTITY_DEF("rightharpoondown", 8641, "\xe2\x87\x81"),
+               ENTITY_DEF("bNot", 10989, "\xe2\xab\xad"),
+               ENTITY_DEF("nsccue", 8929, "\xe2\x8b\xa1"),
+               ENTITY_DEF("zigrarr", 8669, "\xe2\x87\x9d"),
+               ENTITY_DEF("Sacute", 346, "\xc5\x9a"),
+               ENTITY_DEF("orv", 10843, "\xe2\xa9\x9b"),
+               ENTITY_DEF("RightVectorBar", 10579, "\xe2\xa5\x93"),
+               ENTITY_DEF("nrarrw", 8605, "\xe2\x86\x9d\xcc\xb8"),
+               ENTITY_DEF("nbump", 8782, "\xe2\x89\x8e\xcc\xb8"),
+               ENTITY_DEF("iquest", 191, "\xc2\xbf"),
+               ENTITY_DEF("wr", 8768, "\xe2\x89\x80"),
+               ENTITY_DEF("UpArrow", 8593, "\xe2\x86\x91"),
+               ENTITY_DEF("notinva", 8713, "\xe2\x88\x89"),
+               ENTITY_DEF("ddagger", 8225, "\xe2\x80\xa1"),
+               ENTITY_DEF("nLeftarrow", 8653, "\xe2\x87\x8d"),
+               ENTITY_DEF("rbbrk", 10099, "\xe2\x9d\xb3"),
+               ENTITY_DEF("RightTriangle", 8883, "\xe2\x8a\xb3"),
+               ENTITY_DEF("leqq", 8806, "\xe2\x89\xa6"),
+               ENTITY_DEF("Vert", 8214, "\xe2\x80\x96"),
+               ENTITY_DEF("gesl", 8923, "\xe2\x8b\x9b\xef\xb8\x80"),
+               ENTITY_DEF("LeftTeeVector", 10586, "\xe2\xa5\x9a"),
+               ENTITY_DEF("Union", 8899, "\xe2\x8b\x83"),
+               ENTITY_DEF("sc", 8827, "\xe2\x89\xbb"),
+               ENTITY_DEF("ofr", 120108, "\xf0\x9d\x94\xac"),
+               ENTITY_DEF("quatint", 10774, "\xe2\xa8\x96"),
+               ENTITY_DEF("apacir", 10863, "\xe2\xa9\xaf"),
+               ENTITY_DEF("profalar", 9006, "\xe2\x8c\xae"),
+               ENTITY_DEF("subsetneq", 8842, "\xe2\x8a\x8a"),
+               ENTITY_DEF("Vvdash", 8874, "\xe2\x8a\xaa"),
+               ENTITY_DEF("ohbar", 10677, "\xe2\xa6\xb5"),
+               ENTITY_DEF("Gt", 8811, "\xe2\x89\xab"),
+               ENTITY_DEF("exist", 8707, "\xe2\x88\x83"),
+               ENTITY_DEF("gtrapprox", 10886, "\xe2\xaa\x86"),
+               ENTITY_DEF("euml", 235, "\xc3\xab"),
+               ENTITY_DEF("Equilibrium", 8652, "\xe2\x87\x8c"),
+               ENTITY_DEF("aacute", 225, "\xc3\xa1"),
+               ENTITY_DEF("omid", 10678, "\xe2\xa6\xb6"),
+               ENTITY_DEF("loarr", 8701, "\xe2\x87\xbd"),
+               ENTITY_DEF("SucceedsSlantEqual", 8829, "\xe2\x89\xbd"),
+               ENTITY_DEF("angsph", 8738, "\xe2\x88\xa2"),
+               ENTITY_DEF("nsmid", 8740, "\xe2\x88\xa4"),
+               ENTITY_DEF("lsquor", 8218, "\xe2\x80\x9a"),
+               ENTITY_DEF("cemptyv", 10674, "\xe2\xa6\xb2"),
+               ENTITY_DEF("rAarr", 8667, "\xe2\x87\x9b"),
+               ENTITY_DEF("searr", 8600, "\xe2\x86\x98"),
+               ENTITY_DEF("complexes", 8450, "\xe2\x84\x82"),
+               ENTITY_DEF("UnderParenthesis", 9181, "\xe2\x8f\x9d"),
+               ENTITY_DEF("nparsl", 11005, "\xe2\xab\xbd\xe2\x83\xa5"),
+               ENTITY_DEF("Lacute", 313, "\xc4\xb9"),
+               ENTITY_DEF("deg", 176, "\xc2\xb0"),
+               ENTITY_DEF("Racute", 340, "\xc5\x94"),
+               ENTITY_DEF("Verbar", 8214, "\xe2\x80\x96"),
+               ENTITY_DEF("sqcups", 8852, "\xe2\x8a\x94\xef\xb8\x80"),
+               ENTITY_DEF("Hopf", 8461, "\xe2\x84\x8d"),
+               ENTITY_DEF("naturals", 8469, "\xe2\x84\x95"),
+               ENTITY_DEF("Cedilla", 184, "\xc2\xb8"),
+               ENTITY_DEF("exponentiale", 8519, "\xe2\x85\x87"),
+               ENTITY_DEF("vnsup", 8835, "\xe2\x8a\x83\xe2\x83\x92"),
+               ENTITY_DEF("leftrightarrows", 8646, "\xe2\x87\x86"),
+               ENTITY_DEF("Laplacetrf", 8466, "\xe2\x84\x92"),
+               ENTITY_DEF("vartriangleright", 8883, "\xe2\x8a\xb3"),
+               ENTITY_DEF("rtri", 9657, "\xe2\x96\xb9"),
+               ENTITY_DEF("gE", 8807, "\xe2\x89\xa7"),
+               ENTITY_DEF("SmallCircle", 8728, "\xe2\x88\x98"),
+               ENTITY_DEF("diamondsuit", 9830, "\xe2\x99\xa6"),
+               ENTITY_DEF("Otilde", 213, "\xc3\x95"),
+               ENTITY_DEF("lneq", 10887, "\xe2\xaa\x87"),
+               ENTITY_DEF("lesdoto", 10881, "\xe2\xaa\x81"),
+               ENTITY_DEF("ltquest", 10875, "\xe2\xa9\xbb"),
+               ENTITY_DEF("thinsp", 8201, "\xe2\x80\x89"),
+               ENTITY_DEF("barwed", 8965, "\xe2\x8c\x85"),
+               ENTITY_DEF("elsdot", 10903, "\xe2\xaa\x97"),
+               ENTITY_DEF("circ", 710, "\xcb\x86"),
+               ENTITY_DEF("ni", 8715, "\xe2\x88\x8b"),
+               ENTITY_DEF("mlcp", 10971, "\xe2\xab\x9b"),
+               ENTITY_DEF("Vdash", 8873, "\xe2\x8a\xa9"),
+               ENTITY_DEF("ShortRightArrow", 8594, "\xe2\x86\x92"),
+               ENTITY_DEF("upharpoonleft", 8639, "\xe2\x86\xbf"),
+               ENTITY_DEF("UnderBracket", 9141, "\xe2\x8e\xb5"),
+               ENTITY_DEF("rAtail", 10524, "\xe2\xa4\x9c"),
+               ENTITY_DEF("iopf", 120154, "\xf0\x9d\x95\x9a"),
+               ENTITY_DEF("longleftarrow", 10229, "\xe2\x9f\xb5"),
+               ENTITY_DEF("Zacute", 377, "\xc5\xb9"),
+               ENTITY_DEF("duhar", 10607, "\xe2\xa5\xaf"),
+               ENTITY_DEF("Mfr", 120080, "\xf0\x9d\x94\x90"),
+               ENTITY_DEF("prnap", 10937, "\xe2\xaa\xb9"),
+               ENTITY_DEF("eqcirc", 8790, "\xe2\x89\x96"),
+               ENTITY_DEF("rarrlp", 8620, "\xe2\x86\xac"),
+               ENTITY_DEF("le", 8804, "\xe2\x89\xa4"),
+               ENTITY_DEF("Oscr", 119978, "\xf0\x9d\x92\xaa"),
+               ENTITY_DEF("langd", 10641, "\xe2\xa6\x91"),
+               ENTITY_DEF("Ucirc", 219, "\xc3\x9b"),
+               ENTITY_DEF("precnapprox", 10937, "\xe2\xaa\xb9"),
+               ENTITY_DEF("succcurlyeq", 8829, "\xe2\x89\xbd"),
+               ENTITY_DEF("Tau", 932, "\xce\xa4"),
+               ENTITY_DEF("larr", 8592, "\xe2\x86\x90"),
+               ENTITY_DEF("neArr", 8663, "\xe2\x87\x97"),
+               ENTITY_DEF("subsim", 10951, "\xe2\xab\x87"),
+               ENTITY_DEF("DScy", 1029, "\xd0\x85"),
+               ENTITY_DEF("preccurlyeq", 8828, "\xe2\x89\xbc"),
+               ENTITY_DEF("NotLessLess", 8810, "\xe2\x89\xaa\xcc\xb8"),
+               ENTITY_DEF("succnapprox", 10938, "\xe2\xaa\xba"),
+               ENTITY_DEF("prcue", 8828, "\xe2\x89\xbc"),
+               ENTITY_DEF("Downarrow", 8659, "\xe2\x87\x93"),
+               ENTITY_DEF("angmsdah", 10671, "\xe2\xa6\xaf"),
+               ENTITY_DEF("Emacr", 274, "\xc4\x92"),
+               ENTITY_DEF("lsh", 8624, "\xe2\x86\xb0"),
+               ENTITY_DEF("simne", 8774, "\xe2\x89\x86"),
+               ENTITY_DEF("Bumpeq", 8782, "\xe2\x89\x8e"),
+               ENTITY_DEF("RightUpTeeVector", 10588, "\xe2\xa5\x9c"),
+               ENTITY_DEF("Sigma", 931, "\xce\xa3"),
+               ENTITY_DEF("nvltrie", 8884, "\xe2\x8a\xb4\xe2\x83\x92"),
+               ENTITY_DEF("lfr", 120105, "\xf0\x9d\x94\xa9"),
+               ENTITY_DEF("emsp13", 8196, "\xe2\x80\x84"),
+               ENTITY_DEF("parsl", 11005, "\xe2\xab\xbd"),
+               ENTITY_DEF("ucirc", 251, "\xc3\xbb"),
+               ENTITY_DEF("gsiml", 10896, "\xe2\xaa\x90"),
+               ENTITY_DEF("xsqcup", 10758, "\xe2\xa8\x86"),
+               ENTITY_DEF("Omicron", 927, "\xce\x9f"),
+               ENTITY_DEF("gsime", 10894, "\xe2\xaa\x8e"),
+               ENTITY_DEF("circlearrowleft", 8634, "\xe2\x86\xba"),
+               ENTITY_DEF("sqsupe", 8850, "\xe2\x8a\x92"),
+               ENTITY_DEF("supE", 10950, "\xe2\xab\x86"),
+               ENTITY_DEF("dlcrop", 8973, "\xe2\x8c\x8d"),
+               ENTITY_DEF("RightDownTeeVector", 10589, "\xe2\xa5\x9d"),
+               ENTITY_DEF("Colone", 10868, "\xe2\xa9\xb4"),
+               ENTITY_DEF("awconint", 8755, "\xe2\x88\xb3"),
+               ENTITY_DEF("smte", 10924, "\xe2\xaa\xac"),
+               ENTITY_DEF("lEg", 10891, "\xe2\xaa\x8b"),
+               ENTITY_DEF("circledast", 8859, "\xe2\x8a\x9b"),
+               ENTITY_DEF("ecolon", 8789, "\xe2\x89\x95"),
+               ENTITY_DEF("rect", 9645, "\xe2\x96\xad"),
+               ENTITY_DEF("Equal", 10869, "\xe2\xa9\xb5"),
+               ENTITY_DEF("nwnear", 10535, "\xe2\xa4\xa7"),
+               ENTITY_DEF("capdot", 10816, "\xe2\xa9\x80"),
+               ENTITY_DEF("straightphi", 981, "\xcf\x95"),
+               ENTITY_DEF("forkv", 10969, "\xe2\xab\x99"),
+               ENTITY_DEF("ZHcy", 1046, "\xd0\x96"),
+               ENTITY_DEF("Element", 8712, "\xe2\x88\x88"),
+               ENTITY_DEF("rthree", 8908, "\xe2\x8b\x8c"),
+               ENTITY_DEF("vzigzag", 10650, "\xe2\xa6\x9a"),
+               ENTITY_DEF("hybull", 8259, "\xe2\x81\x83"),
+               ENTITY_DEF("intprod", 10812, "\xe2\xa8\xbc"),
+               ENTITY_DEF("HumpEqual", 8783, "\xe2\x89\x8f"),
+               ENTITY_DEF("bigsqcup", 10758, "\xe2\xa8\x86"),
+               ENTITY_DEF("mp", 8723, "\xe2\x88\x93"),
+               ENTITY_DEF("lescc", 10920, "\xe2\xaa\xa8"),
+               ENTITY_DEF("NotPrecedes", 8832, "\xe2\x8a\x80"),
+               ENTITY_DEF("wedge", 8743, "\xe2\x88\xa7"),
+               ENTITY_DEF("Supset", 8913, "\xe2\x8b\x91"),
+               ENTITY_DEF("pm", 177, "\xc2\xb1"),
+               ENTITY_DEF("kfr", 120104, "\xf0\x9d\x94\xa8"),
+               ENTITY_DEF("ufisht", 10622, "\xe2\xa5\xbe"),
+               ENTITY_DEF("ecaron", 283, "\xc4\x9b"),
+               ENTITY_DEF("chcy", 1095, "\xd1\x87"),
+               ENTITY_DEF("Esim", 10867, "\xe2\xa9\xb3"),
+               ENTITY_DEF("fltns", 9649, "\xe2\x96\xb1"),
+               ENTITY_DEF("nsce", 10928, "\xe2\xaa\xb0\xcc\xb8"),
+               ENTITY_DEF("hookrightarrow", 8618, "\xe2\x86\xaa"),
+               ENTITY_DEF("semi", 59, "\x3b"),
+               ENTITY_DEF("ges", 10878, "\xe2\xa9\xbe"),
+               ENTITY_DEF("approxeq", 8778, "\xe2\x89\x8a"),
+               ENTITY_DEF("rarrsim", 10612, "\xe2\xa5\xb4"),
+               ENTITY_DEF("boxhD", 9573, "\xe2\x95\xa5"),
+               ENTITY_DEF("varpi", 982, "\xcf\x96"),
+               ENTITY_DEF("larrb", 8676, "\xe2\x87\xa4"),
+               ENTITY_DEF("copf", 120148, "\xf0\x9d\x95\x94"),
+               ENTITY_DEF("Dopf", 120123, "\xf0\x9d\x94\xbb"),
+               ENTITY_DEF("LeftVector", 8636, "\xe2\x86\xbc"),
+               ENTITY_DEF("iff", 8660, "\xe2\x87\x94"),
+               ENTITY_DEF("lnap", 10889, "\xe2\xaa\x89"),
+               ENTITY_DEF("NotGreaterFullEqual", 8807, "\xe2\x89\xa7\xcc\xb8"),
+               ENTITY_DEF("varrho", 1009, "\xcf\xb1"),
+               ENTITY_DEF("NotSucceeds", 8833, "\xe2\x8a\x81"),
+               ENTITY_DEF("ltrPar", 10646, "\xe2\xa6\x96"),
+               ENTITY_DEF("nlE", 8806, "\xe2\x89\xa6\xcc\xb8"),
+               ENTITY_DEF("Zfr", 8488, "\xe2\x84\xa8"),
+               ENTITY_DEF("LeftArrowBar", 8676, "\xe2\x87\xa4"),
+               ENTITY_DEF("boxplus", 8862, "\xe2\x8a\x9e"),
+               ENTITY_DEF("sqsube", 8849, "\xe2\x8a\x91"),
+               ENTITY_DEF("Re", 8476, "\xe2\x84\x9c"),
+               ENTITY_DEF("Wfr", 120090, "\xf0\x9d\x94\x9a"),
+               ENTITY_DEF("epsi", 949, "\xce\xb5"),
+               ENTITY_DEF("oacute", 243, "\xc3\xb3"),
+               ENTITY_DEF("bdquo", 8222, "\xe2\x80\x9e"),
+               ENTITY_DEF("wscr", 120012, "\xf0\x9d\x93\x8c"),
+               ENTITY_DEF("bullet", 8226, "\xe2\x80\xa2"),
+               ENTITY_DEF("frown", 8994, "\xe2\x8c\xa2"),
+               ENTITY_DEF("siml", 10909, "\xe2\xaa\x9d"),
+               ENTITY_DEF("Rarr", 8608, "\xe2\x86\xa0"),
+               ENTITY_DEF("Scaron", 352, "\xc5\xa0"),
+               ENTITY_DEF("gtreqqless", 10892, "\xe2\xaa\x8c"),
+               ENTITY_DEF("Larr", 8606, "\xe2\x86\x9e"),
+               ENTITY_DEF("notniva", 8716, "\xe2\x88\x8c"),
+               ENTITY_DEF("gg", 8811, "\xe2\x89\xab"),
+               ENTITY_DEF("phmmat", 8499, "\xe2\x84\xb3"),
+               ENTITY_DEF("boxVL", 9571, "\xe2\x95\xa3"),
+               ENTITY_DEF("sigmav", 962, "\xcf\x82"),
+               ENTITY_DEF("order", 8500, "\xe2\x84\xb4"),
+               ENTITY_DEF("subsup", 10963, "\xe2\xab\x93"),
+               ENTITY_DEF("afr", 120094, "\xf0\x9d\x94\x9e"),
+               ENTITY_DEF("lbrace", 123, "\x7b"),
+               ENTITY_DEF("urcorn", 8989, "\xe2\x8c\x9d"),
+               ENTITY_DEF("Im", 8465, "\xe2\x84\x91"),
+               ENTITY_DEF("CounterClockwiseContourIntegral", 8755, "\xe2\x88\xb3"),
+               ENTITY_DEF("lne", 10887, "\xe2\xaa\x87"),
+               ENTITY_DEF("chi", 967, "\xcf\x87"),
+               ENTITY_DEF("cudarrl", 10552, "\xe2\xa4\xb8"),
+               ENTITY_DEF("ang", 8736, "\xe2\x88\xa0"),
+               ENTITY_DEF("isindot", 8949, "\xe2\x8b\xb5"),
+               ENTITY_DEF("Lfr", 120079, "\xf0\x9d\x94\x8f"),
+               ENTITY_DEF("Rsh", 8625, "\xe2\x86\xb1"),
+               ENTITY_DEF("Ocy", 1054, "\xd0\x9e"),
+               ENTITY_DEF("nvrArr", 10499, "\xe2\xa4\x83"),
+               ENTITY_DEF("otimes", 8855, "\xe2\x8a\x97"),
+               ENTITY_DEF("eqslantgtr", 10902, "\xe2\xaa\x96"),
+               ENTITY_DEF("Rfr", 8476, "\xe2\x84\x9c"),
+               ENTITY_DEF("blacktriangleleft", 9666, "\xe2\x97\x82"),
+               ENTITY_DEF("Lsh", 8624, "\xe2\x86\xb0"),
+               ENTITY_DEF("boxvr", 9500, "\xe2\x94\x9c"),
+               ENTITY_DEF("scedil", 351, "\xc5\x9f"),
+               ENTITY_DEF("iuml", 239, "\xc3\xaf"),
+               ENTITY_DEF("NJcy", 1034, "\xd0\x8a"),
+               ENTITY_DEF("Dagger", 8225, "\xe2\x80\xa1"),
+               ENTITY_DEF("rarrap", 10613, "\xe2\xa5\xb5"),
+               ENTITY_DEF("udblac", 369, "\xc5\xb1"),
+               ENTITY_DEF("Sopf", 120138, "\xf0\x9d\x95\x8a"),
+               ENTITY_DEF("scnsim", 8937, "\xe2\x8b\xa9"),
+               ENTITY_DEF("hbar", 8463, "\xe2\x84\x8f"),
+               ENTITY_DEF("frac15", 8533, "\xe2\x85\x95"),
+               ENTITY_DEF("sup3", 179, "\xc2\xb3"),
+               ENTITY_DEF("NegativeThickSpace", 8203, "\xe2\x80\x8b"),
+               ENTITY_DEF("npr", 8832, "\xe2\x8a\x80"),
+               ENTITY_DEF("doteq", 8784, "\xe2\x89\x90"),
+               ENTITY_DEF("subrarr", 10617, "\xe2\xa5\xb9"),
+               ENTITY_DEF("SquareSubset", 8847, "\xe2\x8a\x8f"),
+               ENTITY_DEF("vprop", 8733, "\xe2\x88\x9d"),
+               ENTITY_DEF("OpenCurlyQuote", 8216, "\xe2\x80\x98"),
+               ENTITY_DEF("supseteq", 8839, "\xe2\x8a\x87"),
+               ENTITY_DEF("nRightarrow", 8655, "\xe2\x87\x8f"),
+               ENTITY_DEF("Longleftarrow", 10232, "\xe2\x9f\xb8"),
+               ENTITY_DEF("lsquo", 8216, "\xe2\x80\x98"),
+               ENTITY_DEF("hstrok", 295, "\xc4\xa7"),
+               ENTITY_DEF("NotTilde", 8769, "\xe2\x89\x81"),
+               ENTITY_DEF("ogt", 10689, "\xe2\xa7\x81"),
+               ENTITY_DEF("block", 9608, "\xe2\x96\x88"),
+               ENTITY_DEF("minusd", 8760, "\xe2\x88\xb8"),
+               ENTITY_DEF("esdot", 8784, "\xe2\x89\x90"),
+               ENTITY_DEF("nsim", 8769, "\xe2\x89\x81"),
+               ENTITY_DEF("scsim", 8831, "\xe2\x89\xbf"),
+               ENTITY_DEF("boxVl", 9570, "\xe2\x95\xa2"),
+               ENTITY_DEF("ltimes", 8905, "\xe2\x8b\x89"),
+               ENTITY_DEF("thkap", 8776, "\xe2\x89\x88"),
+               ENTITY_DEF("vnsub", 8834, "\xe2\x8a\x82\xe2\x83\x92"),
+               ENTITY_DEF("thetasym", 977, "\xcf\x91"),
+               ENTITY_DEF("eopf", 120150, "\xf0\x9d\x95\x96"),
+               ENTITY_DEF("image", 8465, "\xe2\x84\x91"),
+               ENTITY_DEF("doteqdot", 8785, "\xe2\x89\x91"),
+               ENTITY_DEF("Udblac", 368, "\xc5\xb0"),
+               ENTITY_DEF("gnsim", 8935, "\xe2\x8b\xa7"),
+               ENTITY_DEF("yicy", 1111, "\xd1\x97"),
+               ENTITY_DEF("vopf", 120167, "\xf0\x9d\x95\xa7"),
+               ENTITY_DEF("DDotrahd", 10513, "\xe2\xa4\x91"),
+               ENTITY_DEF("Iota", 921, "\xce\x99"),
+               ENTITY_DEF("GJcy", 1027, "\xd0\x83"),
+               ENTITY_DEF("rightthreetimes", 8908, "\xe2\x8b\x8c"),
+               ENTITY_DEF("nrtri", 8939, "\xe2\x8b\xab"),
+               ENTITY_DEF("TildeFullEqual", 8773, "\xe2\x89\x85"),
+               ENTITY_DEF("Dcaron", 270, "\xc4\x8e"),
+               ENTITY_DEF("ccaron", 269, "\xc4\x8d"),
+               ENTITY_DEF("lacute", 314, "\xc4\xba"),
+               ENTITY_DEF("VerticalBar", 8739, "\xe2\x88\xa3"),
+               ENTITY_DEF("Igrave", 204, "\xc3\x8c"),
+               ENTITY_DEF("boxH", 9552, "\xe2\x95\x90"),
+               ENTITY_DEF("Pfr", 120083, "\xf0\x9d\x94\x93"),
+               ENTITY_DEF("equals", 61, "\x3d"),
+               ENTITY_DEF("rbrack", 93, "\x5d"),
+               ENTITY_DEF("OverParenthesis", 9180, "\xe2\x8f\x9c"),
+               ENTITY_DEF("in", 8712, "\xe2\x88\x88"),
+               ENTITY_DEF("llcorner", 8990, "\xe2\x8c\x9e"),
+               ENTITY_DEF("mcomma", 10793, "\xe2\xa8\xa9"),
+               ENTITY_DEF("NotGreater", 8815, "\xe2\x89\xaf"),
+               ENTITY_DEF("midcir", 10992, "\xe2\xab\xb0"),
+               ENTITY_DEF("Edot", 278, "\xc4\x96"),
+               ENTITY_DEF("oplus", 8853, "\xe2\x8a\x95"),
+               ENTITY_DEF("geqq", 8807, "\xe2\x89\xa7"),
+               ENTITY_DEF("curvearrowleft", 8630, "\xe2\x86\xb6"),
+               ENTITY_DEF("Poincareplane", 8460, "\xe2\x84\x8c"),
+               ENTITY_DEF("yscr", 120014, "\xf0\x9d\x93\x8e"),
+               ENTITY_DEF("ccaps", 10829, "\xe2\xa9\x8d"),
+               ENTITY_DEF("rpargt", 10644, "\xe2\xa6\x94"),
+               ENTITY_DEF("topfork", 10970, "\xe2\xab\x9a"),
+               ENTITY_DEF("Gamma", 915, "\xce\x93"),
+               ENTITY_DEF("umacr", 363, "\xc5\xab"),
+               ENTITY_DEF("frac13", 8531, "\xe2\x85\x93"),
+               ENTITY_DEF("cirfnint", 10768, "\xe2\xa8\x90"),
+               ENTITY_DEF("xlArr", 10232, "\xe2\x9f\xb8"),
+               ENTITY_DEF("digamma", 989, "\xcf\x9d"),
+               ENTITY_DEF("Hat", 94, "\x5e"),
+               ENTITY_DEF("lates", 10925, "\xe2\xaa\xad\xef\xb8\x80"),
+               ENTITY_DEF("lgE", 10897, "\xe2\xaa\x91"),
+               ENTITY_DEF("commat", 64, "\x40"),
+               ENTITY_DEF("NotPrecedesSlantEqual", 8928, "\xe2\x8b\xa0"),
+               ENTITY_DEF("phone", 9742, "\xe2\x98\x8e"),
+               ENTITY_DEF("Ecirc", 202, "\xc3\x8a"),
+               ENTITY_DEF("lt", 60, "\x3c"),
+               ENTITY_DEF("intcal", 8890, "\xe2\x8a\xba"),
+               ENTITY_DEF("xdtri", 9661, "\xe2\x96\xbd"),
+               ENTITY_DEF("Abreve", 258, "\xc4\x82"),
+               ENTITY_DEF("gopf", 120152, "\xf0\x9d\x95\x98"),
+               ENTITY_DEF("Xopf", 120143, "\xf0\x9d\x95\x8f"),
+               ENTITY_DEF("Iacute", 205, "\xc3\x8d"),
+               ENTITY_DEF("Aopf", 120120, "\xf0\x9d\x94\xb8"),
+               ENTITY_DEF("gbreve", 287, "\xc4\x9f"),
+               ENTITY_DEF("nleq", 8816, "\xe2\x89\xb0"),
+               ENTITY_DEF("xopf", 120169, "\xf0\x9d\x95\xa9"),
+               ENTITY_DEF("SquareSupersetEqual", 8850, "\xe2\x8a\x92"),
+               ENTITY_DEF("NotLessTilde", 8820, "\xe2\x89\xb4"),
+               ENTITY_DEF("SubsetEqual", 8838, "\xe2\x8a\x86"),
+               ENTITY_DEF("Sc", 10940, "\xe2\xaa\xbc"),
+               ENTITY_DEF("sdote", 10854, "\xe2\xa9\xa6"),
+               ENTITY_DEF("loplus", 10797, "\xe2\xa8\xad"),
+               ENTITY_DEF("zfr", 120119, "\xf0\x9d\x94\xb7"),
+               ENTITY_DEF("subseteqq", 10949, "\xe2\xab\x85"),
+               ENTITY_DEF("Vdashl", 10982, "\xe2\xab\xa6"),
+               ENTITY_DEF("integers", 8484, "\xe2\x84\xa4"),
+               ENTITY_DEF("Umacr", 362, "\xc5\xaa"),
+               ENTITY_DEF("dopf", 120149, "\xf0\x9d\x95\x95"),
+               ENTITY_DEF("RightDownVectorBar", 10581, "\xe2\xa5\x95"),
+               ENTITY_DEF("angmsdaf", 10669, "\xe2\xa6\xad"),
+               ENTITY_DEF("Jfr", 120077, "\xf0\x9d\x94\x8d"),
+               ENTITY_DEF("bernou", 8492, "\xe2\x84\xac"),
+               ENTITY_DEF("lceil", 8968, "\xe2\x8c\x88"),
+               ENTITY_DEF("nvsim", 8764, "\xe2\x88\xbc\xe2\x83\x92"),
+               ENTITY_DEF("NotSucceedsSlantEqual", 8929, "\xe2\x8b\xa1"),
+               ENTITY_DEF("hearts", 9829, "\xe2\x99\xa5"),
+               ENTITY_DEF("vee", 8744, "\xe2\x88\xa8"),
+               ENTITY_DEF("LJcy", 1033, "\xd0\x89"),
+               ENTITY_DEF("nlt", 8814, "\xe2\x89\xae"),
+               ENTITY_DEF("because", 8757, "\xe2\x88\xb5"),
+               ENTITY_DEF("hairsp", 8202, "\xe2\x80\x8a"),
+               ENTITY_DEF("comma", 44, "\x2c"),
+               ENTITY_DEF("iecy", 1077, "\xd0\xb5"),
+               ENTITY_DEF("npre", 10927, "\xe2\xaa\xaf\xcc\xb8"),
+               ENTITY_DEF("NotSquareSubset", 8847, "\xe2\x8a\x8f\xcc\xb8"),
+               ENTITY_DEF("mscr", 120002, "\xf0\x9d\x93\x82"),
+               ENTITY_DEF("jopf", 120155, "\xf0\x9d\x95\x9b"),
+               ENTITY_DEF("bumpE", 10926, "\xe2\xaa\xae"),
+               ENTITY_DEF("thicksim", 8764, "\xe2\x88\xbc"),
+               ENTITY_DEF("Nfr", 120081, "\xf0\x9d\x94\x91"),
+               ENTITY_DEF("yucy", 1102, "\xd1\x8e"),
+               ENTITY_DEF("notinvc", 8950, "\xe2\x8b\xb6"),
+               ENTITY_DEF("lstrok", 322, "\xc5\x82"),
+               ENTITY_DEF("robrk", 10215, "\xe2\x9f\xa7"),
+               ENTITY_DEF("LeftTriangleBar", 10703, "\xe2\xa7\x8f"),
+               ENTITY_DEF("hksearow", 10533, "\xe2\xa4\xa5"),
+               ENTITY_DEF("bigcap", 8898, "\xe2\x8b\x82"),
+               ENTITY_DEF("udhar", 10606, "\xe2\xa5\xae"),
+               ENTITY_DEF("Yscr", 119988, "\xf0\x9d\x92\xb4"),
+               ENTITY_DEF("smeparsl", 10724, "\xe2\xa7\xa4"),
+               ENTITY_DEF("NotLess", 8814, "\xe2\x89\xae"),
+               ENTITY_DEF("dcaron", 271, "\xc4\x8f"),
+               ENTITY_DEF("ange", 10660, "\xe2\xa6\xa4"),
+               ENTITY_DEF("dHar", 10597, "\xe2\xa5\xa5"),
+               ENTITY_DEF("UpperRightArrow", 8599, "\xe2\x86\x97"),
+               ENTITY_DEF("trpezium", 9186, "\xe2\x8f\xa2"),
+               ENTITY_DEF("boxminus", 8863, "\xe2\x8a\x9f"),
+               ENTITY_DEF("notni", 8716, "\xe2\x88\x8c"),
+               ENTITY_DEF("dtrif", 9662, "\xe2\x96\xbe"),
+               ENTITY_DEF("nhArr", 8654, "\xe2\x87\x8e"),
+               ENTITY_DEF("larrpl", 10553, "\xe2\xa4\xb9"),
+               ENTITY_DEF("simeq", 8771, "\xe2\x89\x83"),
+               ENTITY_DEF("geqslant", 10878, "\xe2\xa9\xbe"),
+               ENTITY_DEF("RightUpVectorBar", 10580, "\xe2\xa5\x94"),
+               ENTITY_DEF("nsc", 8833, "\xe2\x8a\x81"),
+               ENTITY_DEF("div", 247, "\xc3\xb7"),
+               ENTITY_DEF("orslope", 10839, "\xe2\xa9\x97"),
+               ENTITY_DEF("lparlt", 10643, "\xe2\xa6\x93"),
+               ENTITY_DEF("trie", 8796, "\xe2\x89\x9c"),
+               ENTITY_DEF("cirmid", 10991, "\xe2\xab\xaf"),
+               ENTITY_DEF("wp", 8472, "\xe2\x84\x98"),
+               ENTITY_DEF("dagger", 8224, "\xe2\x80\xa0"),
+               ENTITY_DEF("utri", 9653, "\xe2\x96\xb5"),
+               ENTITY_DEF("supnE", 10956, "\xe2\xab\x8c"),
+               ENTITY_DEF("eg", 10906, "\xe2\xaa\x9a"),
+               ENTITY_DEF("LeftDownVector", 8643, "\xe2\x87\x83"),
+               ENTITY_DEF("NotLessEqual", 8816, "\xe2\x89\xb0"),
+               ENTITY_DEF("Bopf", 120121, "\xf0\x9d\x94\xb9"),
+               ENTITY_DEF("LongLeftRightArrow", 10231, "\xe2\x9f\xb7"),
+               ENTITY_DEF("Gfr", 120074, "\xf0\x9d\x94\x8a"),
+               ENTITY_DEF("sqsubseteq", 8849, "\xe2\x8a\x91"),
+               ENTITY_DEF("ograve", 242, "\xc3\xb2"),
+               ENTITY_DEF("larrhk", 8617, "\xe2\x86\xa9"),
+               ENTITY_DEF("sigma", 963, "\xcf\x83"),
+               ENTITY_DEF("NotSquareSupersetEqual", 8931, "\xe2\x8b\xa3"),
+               ENTITY_DEF("gvnE", 8809, "\xe2\x89\xa9\xef\xb8\x80"),
+               ENTITY_DEF("timesbar", 10801, "\xe2\xa8\xb1"),
+               ENTITY_DEF("Iukcy", 1030, "\xd0\x86"),
+               ENTITY_DEF("bscr", 119991, "\xf0\x9d\x92\xb7"),
+               ENTITY_DEF("Exists", 8707, "\xe2\x88\x83"),
+               ENTITY_DEF("tscr", 120009, "\xf0\x9d\x93\x89"),
+               ENTITY_DEF("tcy", 1090, "\xd1\x82"),
+               ENTITY_DEF("nwarr", 8598, "\xe2\x86\x96"),
+               ENTITY_DEF("hoarr", 8703, "\xe2\x87\xbf"),
+               ENTITY_DEF("lnapprox", 10889, "\xe2\xaa\x89"),
+               ENTITY_DEF("nu", 957, "\xce\xbd"),
+               ENTITY_DEF("bcy", 1073, "\xd0\xb1"),
+               ENTITY_DEF("ndash", 8211, "\xe2\x80\x93"),
+               ENTITY_DEF("smt", 10922, "\xe2\xaa\xaa"),
+               ENTITY_DEF("scaron", 353, "\xc5\xa1"),
+               ENTITY_DEF("IOcy", 1025, "\xd0\x81"),
+               ENTITY_DEF("Ifr", 8465, "\xe2\x84\x91"),
+               ENTITY_DEF("cularrp", 10557, "\xe2\xa4\xbd"),
+               ENTITY_DEF("lvertneqq", 8808, "\xe2\x89\xa8\xef\xb8\x80"),
+               ENTITY_DEF("nlarr", 8602, "\xe2\x86\x9a"),
+               ENTITY_DEF("colon", 58, "\x3a"),
+               ENTITY_DEF("ddotseq", 10871, "\xe2\xa9\xb7"),
+               ENTITY_DEF("zacute", 378, "\xc5\xba"),
+               ENTITY_DEF("DoubleVerticalBar", 8741, "\xe2\x88\xa5"),
+               ENTITY_DEF("larrfs", 10525, "\xe2\xa4\x9d"),
+               ENTITY_DEF("NotExists", 8708, "\xe2\x88\x84"),
+               ENTITY_DEF("geq", 8805, "\xe2\x89\xa5"),
+               ENTITY_DEF("Ffr", 120073, "\xf0\x9d\x94\x89"),
+               ENTITY_DEF("divide", 247, "\xc3\xb7"),
+               ENTITY_DEF("blank", 9251, "\xe2\x90\xa3"),
+               ENTITY_DEF("IEcy", 1045, "\xd0\x95"),
+               ENTITY_DEF("ordm", 186, "\xc2\xba"),
+               ENTITY_DEF("fopf", 120151, "\xf0\x9d\x95\x97"),
+               ENTITY_DEF("ecir", 8790, "\xe2\x89\x96"),
+               ENTITY_DEF("complement", 8705, "\xe2\x88\x81"),
+               ENTITY_DEF("top", 8868, "\xe2\x8a\xa4"),
+               ENTITY_DEF("DoubleContourIntegral", 8751, "\xe2\x88\xaf"),
+               ENTITY_DEF("nisd", 8954, "\xe2\x8b\xba"),
+               ENTITY_DEF("bcong", 8780, "\xe2\x89\x8c"),
+               ENTITY_DEF("plusdu", 10789, "\xe2\xa8\xa5"),
+               ENTITY_DEF("TildeTilde", 8776, "\xe2\x89\x88"),
+               ENTITY_DEF("lnE", 8808, "\xe2\x89\xa8"),
+               ENTITY_DEF("DoubleLongRightArrow", 10233, "\xe2\x9f\xb9"),
+               ENTITY_DEF("nsubseteqq", 10949, "\xe2\xab\x85\xcc\xb8"),
+               ENTITY_DEF("DownTeeArrow", 8615, "\xe2\x86\xa7"),
+               ENTITY_DEF("Cscr", 119966, "\xf0\x9d\x92\x9e"),
+               ENTITY_DEF("NegativeVeryThinSpace", 8203, "\xe2\x80\x8b"),
+               ENTITY_DEF("emsp", 8195, "\xe2\x80\x83"),
+               ENTITY_DEF("vartriangleleft", 8882, "\xe2\x8a\xb2"),
+               ENTITY_DEF("ropar", 10630, "\xe2\xa6\x86"),
+               ENTITY_DEF("checkmark", 10003, "\xe2\x9c\x93"),
+               ENTITY_DEF("Ycy", 1067, "\xd0\xab"),
+               ENTITY_DEF("supset", 8835, "\xe2\x8a\x83"),
+               ENTITY_DEF("gneqq", 8809, "\xe2\x89\xa9"),
+               ENTITY_DEF("Lstrok", 321, "\xc5\x81"),
+               ENTITY_DEF("AMP", 38, "\x26"),
+               ENTITY_DEF("acE", 8766, "\xe2\x88\xbe\xcc\xb3"),
+               ENTITY_DEF("sqsupseteq", 8850, "\xe2\x8a\x92"),
+               ENTITY_DEF("nle", 8816, "\xe2\x89\xb0"),
+               ENTITY_DEF("nesear", 10536, "\xe2\xa4\xa8"),
+               ENTITY_DEF("LeftDownVectorBar", 10585, "\xe2\xa5\x99"),
+               ENTITY_DEF("Integral", 8747, "\xe2\x88\xab"),
+               ENTITY_DEF("Beta", 914, "\xce\x92"),
+               ENTITY_DEF("nvdash", 8876, "\xe2\x8a\xac"),
+               ENTITY_DEF("nges", 10878, "\xe2\xa9\xbe\xcc\xb8"),
+               ENTITY_DEF("demptyv", 10673, "\xe2\xa6\xb1"),
+               ENTITY_DEF("eta", 951, "\xce\xb7"),
+               ENTITY_DEF("GreaterSlantEqual", 10878, "\xe2\xa9\xbe"),
+               ENTITY_DEF("ccedil", 231, "\xc3\xa7"),
+               ENTITY_DEF("pfr", 120109, "\xf0\x9d\x94\xad"),
+               ENTITY_DEF("bbrktbrk", 9142, "\xe2\x8e\xb6"),
+               ENTITY_DEF("mcy", 1084, "\xd0\xbc"),
+               ENTITY_DEF("Not", 10988, "\xe2\xab\xac"),
+               ENTITY_DEF("qscr", 120006, "\xf0\x9d\x93\x86"),
+               ENTITY_DEF("zwj", 8205, "\xe2\x80\x8d"),
+               ENTITY_DEF("ntrianglerighteq", 8941, "\xe2\x8b\xad"),
+               ENTITY_DEF("permil", 8240, "\xe2\x80\xb0"),
+               ENTITY_DEF("squarf", 9642, "\xe2\x96\xaa"),
+               ENTITY_DEF("apos", 39, "\x27"),
+               ENTITY_DEF("lrm", 8206, "\xe2\x80\x8e"),
+               ENTITY_DEF("male", 9794, "\xe2\x99\x82"),
+               ENTITY_DEF("agrave", 224, "\xc3\xa0"),
+               ENTITY_DEF("Lt", 8810, "\xe2\x89\xaa"),
+               ENTITY_DEF("capand", 10820, "\xe2\xa9\x84"),
+               ENTITY_DEF("aring", 229, "\xc3\xa5"),
+               ENTITY_DEF("Jukcy", 1028, "\xd0\x84"),
+               ENTITY_DEF("bumpe", 8783, "\xe2\x89\x8f"),
+               ENTITY_DEF("dd", 8518, "\xe2\x85\x86"),
+               ENTITY_DEF("tscy", 1094, "\xd1\x86"),
+               ENTITY_DEF("oS", 9416, "\xe2\x93\x88"),
+               ENTITY_DEF("succeq", 10928, "\xe2\xaa\xb0"),
+               ENTITY_DEF("xharr", 10231, "\xe2\x9f\xb7"),
+               ENTITY_DEF("pluse", 10866, "\xe2\xa9\xb2"),
+               ENTITY_DEF("rfisht", 10621, "\xe2\xa5\xbd"),
+               ENTITY_DEF("HorizontalLine", 9472, "\xe2\x94\x80"),
+               ENTITY_DEF("DiacriticalAcute", 180, "\xc2\xb4"),
+               ENTITY_DEF("hfr", 120101, "\xf0\x9d\x94\xa5"),
+               ENTITY_DEF("preceq", 10927, "\xe2\xaa\xaf"),
+               ENTITY_DEF("rationals", 8474, "\xe2\x84\x9a"),
+               ENTITY_DEF("Auml", 196, "\xc3\x84"),
+               ENTITY_DEF("LeftRightArrow", 8596, "\xe2\x86\x94"),
+               ENTITY_DEF("blacktriangleright", 9656, "\xe2\x96\xb8"),
+               ENTITY_DEF("dharr", 8642, "\xe2\x87\x82"),
+               ENTITY_DEF("isin", 8712, "\xe2\x88\x88"),
+               ENTITY_DEF("ldrushar", 10571, "\xe2\xa5\x8b"),
+               ENTITY_DEF("squ", 9633, "\xe2\x96\xa1"),
+               ENTITY_DEF("rbrksld", 10638, "\xe2\xa6\x8e"),
+               ENTITY_DEF("bigwedge", 8896, "\xe2\x8b\x80"),
+               ENTITY_DEF("swArr", 8665, "\xe2\x87\x99"),
+               ENTITY_DEF("IJlig", 306, "\xc4\xb2"),
+               ENTITY_DEF("harr", 8596, "\xe2\x86\x94"),
+               ENTITY_DEF("range", 10661, "\xe2\xa6\xa5"),
+               ENTITY_DEF("urtri", 9721, "\xe2\x97\xb9"),
+               ENTITY_DEF("NotVerticalBar", 8740, "\xe2\x88\xa4"),
+               ENTITY_DEF("ic", 8291, "\xe2\x81\xa3"),
+               ENTITY_DEF("solbar", 9023, "\xe2\x8c\xbf"),
+               ENTITY_DEF("approx", 8776, "\xe2\x89\x88"),
+               ENTITY_DEF("SquareSuperset", 8848, "\xe2\x8a\x90"),
+               ENTITY_DEF("numsp", 8199, "\xe2\x80\x87"),
+               ENTITY_DEF("nLt", 8810, "\xe2\x89\xaa\xe2\x83\x92"),
+               ENTITY_DEF("tilde", 732, "\xcb\x9c"),
+               ENTITY_DEF("rlarr", 8644, "\xe2\x87\x84"),
+               ENTITY_DEF("langle", 10216, "\xe2\x9f\xa8"),
+               ENTITY_DEF("nleqslant", 10877, "\xe2\xa9\xbd\xcc\xb8"),
+               ENTITY_DEF("Nacute", 323, "\xc5\x83"),
+               ENTITY_DEF("NotLeftTriangle", 8938, "\xe2\x8b\xaa"),
+               ENTITY_DEF("sopf", 120164, "\xf0\x9d\x95\xa4"),
+               ENTITY_DEF("xmap", 10236, "\xe2\x9f\xbc"),
+               ENTITY_DEF("supne", 8843, "\xe2\x8a\x8b"),
+               ENTITY_DEF("Int", 8748, "\xe2\x88\xac"),
+               ENTITY_DEF("nsupseteqq", 10950, "\xe2\xab\x86\xcc\xb8"),
+               ENTITY_DEF("circlearrowright", 8635, "\xe2\x86\xbb"),
+               ENTITY_DEF("NotCongruent", 8802, "\xe2\x89\xa2"),
+               ENTITY_DEF("Scedil", 350, "\xc5\x9e"),
+               ENTITY_DEF("raquo", 187, "\xc2\xbb"),
+               ENTITY_DEF("ycy", 1099, "\xd1\x8b"),
+               ENTITY_DEF("notinvb", 8951, "\xe2\x8b\xb7"),
+               ENTITY_DEF("andv", 10842, "\xe2\xa9\x9a"),
+               ENTITY_DEF("nap", 8777, "\xe2\x89\x89"),
+               ENTITY_DEF("shcy", 1096, "\xd1\x88"),
+               ENTITY_DEF("ssetmn", 8726, "\xe2\x88\x96"),
+               ENTITY_DEF("downarrow", 8595, "\xe2\x86\x93"),
+               ENTITY_DEF("gesdotol", 10884, "\xe2\xaa\x84"),
+               ENTITY_DEF("Congruent", 8801, "\xe2\x89\xa1"),
+               ENTITY_DEF("pound", 163, "\xc2\xa3"),
+               ENTITY_DEF("ZeroWidthSpace", 8203, "\xe2\x80\x8b"),
+               ENTITY_DEF("rdca", 10551, "\xe2\xa4\xb7"),
+               ENTITY_DEF("rmoust", 9137, "\xe2\x8e\xb1"),
+               ENTITY_DEF("zcy", 1079, "\xd0\xb7"),
+               ENTITY_DEF("Square", 9633, "\xe2\x96\xa1"),
+               ENTITY_DEF("subE", 10949, "\xe2\xab\x85"),
+               ENTITY_DEF("infintie", 10717, "\xe2\xa7\x9d"),
+               ENTITY_DEF("Cayleys", 8493, "\xe2\x84\xad"),
+               ENTITY_DEF("lsaquo", 8249, "\xe2\x80\xb9"),
+               ENTITY_DEF("realpart", 8476, "\xe2\x84\x9c"),
+               ENTITY_DEF("nprec", 8832, "\xe2\x8a\x80"),
+               ENTITY_DEF("RightTriangleBar", 10704, "\xe2\xa7\x90"),
+               ENTITY_DEF("Kopf", 120130, "\xf0\x9d\x95\x82"),
+               ENTITY_DEF("Ubreve", 364, "\xc5\xac"),
+               ENTITY_DEF("Uopf", 120140, "\xf0\x9d\x95\x8c"),
+               ENTITY_DEF("trianglelefteq", 8884, "\xe2\x8a\xb4"),
+               ENTITY_DEF("rotimes", 10805, "\xe2\xa8\xb5"),
+               ENTITY_DEF("qfr", 120110, "\xf0\x9d\x94\xae"),
+               ENTITY_DEF("gtcc", 10919, "\xe2\xaa\xa7"),
+               ENTITY_DEF("fnof", 402, "\xc6\x92"),
+               ENTITY_DEF("tritime", 10811, "\xe2\xa8\xbb"),
+               ENTITY_DEF("andslope", 10840, "\xe2\xa9\x98"),
+               ENTITY_DEF("harrw", 8621, "\xe2\x86\xad"),
+               ENTITY_DEF("NotSquareSuperset", 8848, "\xe2\x8a\x90\xcc\xb8"),
+               ENTITY_DEF("Amacr", 256, "\xc4\x80"),
+               ENTITY_DEF("OpenCurlyDoubleQuote", 8220, "\xe2\x80\x9c"),
+               ENTITY_DEF("thorn", 254, "\xc3\xbe"),
+               ENTITY_DEF("ordf", 170, "\xc2\xaa"),
+               ENTITY_DEF("natur", 9838, "\xe2\x99\xae"),
+               ENTITY_DEF("xi", 958, "\xce\xbe"),
+               ENTITY_DEF("infin", 8734, "\xe2\x88\x9e"),
+               ENTITY_DEF("nspar", 8742, "\xe2\x88\xa6"),
+               ENTITY_DEF("Jcy", 1049, "\xd0\x99"),
+               ENTITY_DEF("DownLeftTeeVector", 10590, "\xe2\xa5\x9e"),
+               ENTITY_DEF("rbarr", 10509, "\xe2\xa4\x8d"),
+               ENTITY_DEF("Xi", 926, "\xce\x9e"),
+               ENTITY_DEF("bull", 8226, "\xe2\x80\xa2"),
+               ENTITY_DEF("cuesc", 8927, "\xe2\x8b\x9f"),
+               ENTITY_DEF("backcong", 8780, "\xe2\x89\x8c"),
+               ENTITY_DEF("frac35", 8535, "\xe2\x85\x97"),
+               ENTITY_DEF("hscr", 119997, "\xf0\x9d\x92\xbd"),
+               ENTITY_DEF("LessEqualGreater", 8922, "\xe2\x8b\x9a"),
+               ENTITY_DEF("Implies", 8658, "\xe2\x87\x92"),
+               ENTITY_DEF("ETH", 208, "\xc3\x90"),
+               ENTITY_DEF("Yacute", 221, "\xc3\x9d"),
+               ENTITY_DEF("shy", 173, "\xc2\xad"),
+               ENTITY_DEF("Rarrtl", 10518, "\xe2\xa4\x96"),
+               ENTITY_DEF("sup1", 185, "\xc2\xb9"),
+               ENTITY_DEF("reals", 8477, "\xe2\x84\x9d"),
+               ENTITY_DEF("blacklozenge", 10731, "\xe2\xa7\xab"),
+               ENTITY_DEF("ncedil", 326, "\xc5\x86"),
+               ENTITY_DEF("Lambda", 923, "\xce\x9b"),
+               ENTITY_DEF("uopf", 120166, "\xf0\x9d\x95\xa6"),
+               ENTITY_DEF("bigodot", 10752, "\xe2\xa8\x80"),
+               ENTITY_DEF("ubreve", 365, "\xc5\xad"),
+               ENTITY_DEF("drbkarow", 10512, "\xe2\xa4\x90"),
+               ENTITY_DEF("els", 10901, "\xe2\xaa\x95"),
+               ENTITY_DEF("shortparallel", 8741, "\xe2\x88\xa5"),
+               ENTITY_DEF("Pcy", 1055, "\xd0\x9f"),
+               ENTITY_DEF("dsol", 10742, "\xe2\xa7\xb6"),
+               ENTITY_DEF("supsim", 10952, "\xe2\xab\x88"),
+               ENTITY_DEF("Longrightarrow", 10233, "\xe2\x9f\xb9"),
+               ENTITY_DEF("ThickSpace", 8287, "\xe2\x81\x9f\xe2\x80\x8a"),
+               ENTITY_DEF("Itilde", 296, "\xc4\xa8"),
+               ENTITY_DEF("nparallel", 8742, "\xe2\x88\xa6"),
+               ENTITY_DEF("And", 10835, "\xe2\xa9\x93"),
+               ENTITY_DEF("boxhd", 9516, "\xe2\x94\xac"),
+               ENTITY_DEF("Dashv", 10980, "\xe2\xab\xa4"),
+               ENTITY_DEF("NotSuperset", 8835, "\xe2\x8a\x83\xe2\x83\x92"),
+               ENTITY_DEF("Eta", 919, "\xce\x97"),
+               ENTITY_DEF("Qopf", 8474, "\xe2\x84\x9a"),
+               ENTITY_DEF("period", 46, "\x2e"),
+               ENTITY_DEF("angmsd", 8737, "\xe2\x88\xa1"),
+               ENTITY_DEF("fllig", 64258, "\xef\xac\x82"),
+               ENTITY_DEF("cuvee", 8910, "\xe2\x8b\x8e"),
+               ENTITY_DEF("wedbar", 10847, "\xe2\xa9\x9f"),
+               ENTITY_DEF("Fscr", 8497, "\xe2\x84\xb1"),
+               ENTITY_DEF("veebar", 8891, "\xe2\x8a\xbb"),
+               ENTITY_DEF("Longleftrightarrow", 10234, "\xe2\x9f\xba"),
+               ENTITY_DEF("reg", 174, "\xc2\xae"),
+               ENTITY_DEF("NegativeMediumSpace", 8203, "\xe2\x80\x8b"),
+               ENTITY_DEF("Upsi", 978, "\xcf\x92"),
+               ENTITY_DEF("Mellintrf", 8499, "\xe2\x84\xb3"),
+               ENTITY_DEF("boxHU", 9577, "\xe2\x95\xa9"),
+               ENTITY_DEF("frac56", 8538, "\xe2\x85\x9a"),
+               ENTITY_DEF("utrif", 9652, "\xe2\x96\xb4"),
+               ENTITY_DEF("LeftTriangle", 8882, "\xe2\x8a\xb2"),
+               ENTITY_DEF("nsime", 8772, "\xe2\x89\x84"),
+               ENTITY_DEF("rcedil", 343, "\xc5\x97"),
+               ENTITY_DEF("aogon", 261, "\xc4\x85"),
+               ENTITY_DEF("uHar", 10595, "\xe2\xa5\xa3"),
+               ENTITY_DEF("ForAll", 8704, "\xe2\x88\x80"),
+               ENTITY_DEF("prE", 10931, "\xe2\xaa\xb3"),
+               ENTITY_DEF("boxV", 9553, "\xe2\x95\x91"),
+               ENTITY_DEF("softcy", 1100, "\xd1\x8c"),
+               ENTITY_DEF("hercon", 8889, "\xe2\x8a\xb9"),
+               ENTITY_DEF("lmoustache", 9136, "\xe2\x8e\xb0"),
+               ENTITY_DEF("Product", 8719, "\xe2\x88\x8f"),
+               ENTITY_DEF("lsimg", 10895, "\xe2\xaa\x8f"),
+               ENTITY_DEF("verbar", 124, "\x7c"),
+               ENTITY_DEF("ofcir", 10687, "\xe2\xa6\xbf"),
+               ENTITY_DEF("curlyeqprec", 8926, "\xe2\x8b\x9e"),
+               ENTITY_DEF("ldquo", 8220, "\xe2\x80\x9c"),
+               ENTITY_DEF("bot", 8869, "\xe2\x8a\xa5"),
+               ENTITY_DEF("Psi", 936, "\xce\xa8"),
+               ENTITY_DEF("OElig", 338, "\xc5\x92"),
+               ENTITY_DEF("DownRightVectorBar", 10583, "\xe2\xa5\x97"),
+               ENTITY_DEF("minusb", 8863, "\xe2\x8a\x9f"),
+               ENTITY_DEF("Iscr", 8464, "\xe2\x84\x90"),
+               ENTITY_DEF("Tcedil", 354, "\xc5\xa2"),
+               ENTITY_DEF("ffilig", 64259, "\xef\xac\x83"),
+               ENTITY_DEF("Gcy", 1043, "\xd0\x93"),
+               ENTITY_DEF("oline", 8254, "\xe2\x80\xbe"),
+               ENTITY_DEF("bottom", 8869, "\xe2\x8a\xa5"),
+               ENTITY_DEF("nVDash", 8879, "\xe2\x8a\xaf"),
+               ENTITY_DEF("lessdot", 8918, "\xe2\x8b\x96"),
+               ENTITY_DEF("cups", 8746, "\xe2\x88\xaa\xef\xb8\x80"),
+               ENTITY_DEF("gla", 10917, "\xe2\xaa\xa5"),
+               ENTITY_DEF("hellip", 8230, "\xe2\x80\xa6"),
+               ENTITY_DEF("hookleftarrow", 8617, "\xe2\x86\xa9"),
+               ENTITY_DEF("Cup", 8915, "\xe2\x8b\x93"),
+               ENTITY_DEF("upsi", 965, "\xcf\x85"),
+               ENTITY_DEF("DownArrowBar", 10515, "\xe2\xa4\x93"),
+               ENTITY_DEF("lowast", 8727, "\xe2\x88\x97"),
+               ENTITY_DEF("profline", 8978, "\xe2\x8c\x92"),
+               ENTITY_DEF("ngsim", 8821, "\xe2\x89\xb5"),
+               ENTITY_DEF("boxhu", 9524, "\xe2\x94\xb4"),
+               ENTITY_DEF("operp", 10681, "\xe2\xa6\xb9"),
+               ENTITY_DEF("cap", 8745, "\xe2\x88\xa9"),
+               ENTITY_DEF("Hcirc", 292, "\xc4\xa4"),
+               ENTITY_DEF("Ncy", 1053, "\xd0\x9d"),
+               ENTITY_DEF("zeetrf", 8488, "\xe2\x84\xa8"),
+               ENTITY_DEF("cuepr", 8926, "\xe2\x8b\x9e"),
+               ENTITY_DEF("supsetneq", 8843, "\xe2\x8a\x8b"),
+               ENTITY_DEF("lfloor", 8970, "\xe2\x8c\x8a"),
+               ENTITY_DEF("ngtr", 8815, "\xe2\x89\xaf"),
+               ENTITY_DEF("ccups", 10828, "\xe2\xa9\x8c"),
+               ENTITY_DEF("pscr", 120005, "\xf0\x9d\x93\x85"),
+               ENTITY_DEF("Cfr", 8493, "\xe2\x84\xad"),
+               ENTITY_DEF("dtri", 9663, "\xe2\x96\xbf"),
+               ENTITY_DEF("icirc", 238, "\xc3\xae"),
+               ENTITY_DEF("leftarrow", 8592, "\xe2\x86\x90"),
+               ENTITY_DEF("vdash", 8866, "\xe2\x8a\xa2"),
+               ENTITY_DEF("leftrightharpoons", 8651, "\xe2\x87\x8b"),
+               ENTITY_DEF("rightrightarrows", 8649, "\xe2\x87\x89"),
+               ENTITY_DEF("strns", 175, "\xc2\xaf"),
+               ENTITY_DEF("intlarhk", 10775, "\xe2\xa8\x97"),
+               ENTITY_DEF("downharpoonright", 8642, "\xe2\x87\x82"),
+               ENTITY_DEF("yacute", 253, "\xc3\xbd"),
+               ENTITY_DEF("boxUr", 9561, "\xe2\x95\x99"),
+               ENTITY_DEF("triangleleft", 9667, "\xe2\x97\x83"),
+               ENTITY_DEF("DiacriticalDot", 729, "\xcb\x99"),
+               ENTITY_DEF("thetav", 977, "\xcf\x91"),
+               ENTITY_DEF("OverBracket", 9140, "\xe2\x8e\xb4"),
+               ENTITY_DEF("PrecedesTilde", 8830, "\xe2\x89\xbe"),
+               ENTITY_DEF("rtrie", 8885, "\xe2\x8a\xb5"),
+               ENTITY_DEF("Scirc", 348, "\xc5\x9c"),
+               ENTITY_DEF("vsupne", 8843, "\xe2\x8a\x8b\xef\xb8\x80"),
+               ENTITY_DEF("OverBrace", 9182, "\xe2\x8f\x9e"),
+               ENTITY_DEF("Yfr", 120092, "\xf0\x9d\x94\x9c"),
+               ENTITY_DEF("scnE", 10934, "\xe2\xaa\xb6"),
+               ENTITY_DEF("simlE", 10911, "\xe2\xaa\x9f"),
+               ENTITY_DEF("Proportional", 8733, "\xe2\x88\x9d"),
+               ENTITY_DEF("edot", 279, "\xc4\x97"),
+               ENTITY_DEF("loang", 10220, "\xe2\x9f\xac"),
+               ENTITY_DEF("gesdot", 10880, "\xe2\xaa\x80"),
+               ENTITY_DEF("DownBreve", 785, "\xcc\x91"),
+               ENTITY_DEF("pcy", 1087, "\xd0\xbf"),
+               ENTITY_DEF("Succeeds", 8827, "\xe2\x89\xbb"),
+               ENTITY_DEF("mfr", 120106, "\xf0\x9d\x94\xaa"),
+               ENTITY_DEF("Leftarrow", 8656, "\xe2\x87\x90"),
+               ENTITY_DEF("boxDr", 9555, "\xe2\x95\x93"),
+               ENTITY_DEF("Nscr", 119977, "\xf0\x9d\x92\xa9"),
+               ENTITY_DEF("diam", 8900, "\xe2\x8b\x84"),
+               ENTITY_DEF("CHcy", 1063, "\xd0\xa7"),
+               ENTITY_DEF("boxdr", 9484, "\xe2\x94\x8c"),
+               ENTITY_DEF("rlm", 8207, "\xe2\x80\x8f"),
+               ENTITY_DEF("Coproduct", 8720, "\xe2\x88\x90"),
+               ENTITY_DEF("RightTeeArrow", 8614, "\xe2\x86\xa6"),
+               ENTITY_DEF("tridot", 9708, "\xe2\x97\xac"),
+               ENTITY_DEF("ldquor", 8222, "\xe2\x80\x9e"),
+               ENTITY_DEF("sol", 47, "\x2f"),
+               ENTITY_DEF("ecirc", 234, "\xc3\xaa"),
+               ENTITY_DEF("DoubleLeftArrow", 8656, "\xe2\x87\x90"),
+               ENTITY_DEF("Gscr", 119970, "\xf0\x9d\x92\xa2"),
+               ENTITY_DEF("ap", 8776, "\xe2\x89\x88"),
+               ENTITY_DEF("rbrke", 10636, "\xe2\xa6\x8c"),
+               ENTITY_DEF("LeftFloor", 8970, "\xe2\x8c\x8a"),
+               ENTITY_DEF("blk12", 9618, "\xe2\x96\x92"),
+               ENTITY_DEF("Conint", 8751, "\xe2\x88\xaf"),
+               ENTITY_DEF("triangledown", 9663, "\xe2\x96\xbf"),
+               ENTITY_DEF("Icy", 1048, "\xd0\x98"),
+               ENTITY_DEF("backprime", 8245, "\xe2\x80\xb5"),
+               ENTITY_DEF("longleftrightarrow", 10231, "\xe2\x9f\xb7"),
+               ENTITY_DEF("ntriangleleft", 8938, "\xe2\x8b\xaa"),
+               ENTITY_DEF("copy", 169, "\xc2\xa9"),
+               ENTITY_DEF("mapstodown", 8615, "\xe2\x86\xa7"),
+               ENTITY_DEF("seArr", 8664, "\xe2\x87\x98"),
+               ENTITY_DEF("ENG", 330, "\xc5\x8a"),
+               ENTITY_DEF("DoubleRightArrow", 8658, "\xe2\x87\x92"),
+               ENTITY_DEF("tfr", 120113, "\xf0\x9d\x94\xb1"),
+               ENTITY_DEF("rharul", 10604, "\xe2\xa5\xac"),
+               ENTITY_DEF("bfr", 120095, "\xf0\x9d\x94\x9f"),
+               ENTITY_DEF("origof", 8886, "\xe2\x8a\xb6"),
+               ENTITY_DEF("Therefore", 8756, "\xe2\x88\xb4"),
+               ENTITY_DEF("glE", 10898, "\xe2\xaa\x92"),
+               ENTITY_DEF("leftarrowtail", 8610, "\xe2\x86\xa2"),
+               ENTITY_DEF("NotEqual", 8800, "\xe2\x89\xa0"),
+               ENTITY_DEF("LeftCeiling", 8968, "\xe2\x8c\x88"),
+               ENTITY_DEF("lArr", 8656, "\xe2\x87\x90"),
+               ENTITY_DEF("subseteq", 8838, "\xe2\x8a\x86"),
+               ENTITY_DEF("larrbfs", 10527, "\xe2\xa4\x9f"),
+               ENTITY_DEF("Gammad", 988, "\xcf\x9c"),
+               ENTITY_DEF("rtriltri", 10702, "\xe2\xa7\x8e"),
+               ENTITY_DEF("Fcy", 1060, "\xd0\xa4"),
+               ENTITY_DEF("Vopf", 120141, "\xf0\x9d\x95\x8d"),
+               ENTITY_DEF("lrarr", 8646, "\xe2\x87\x86"),
+               ENTITY_DEF("delta", 948, "\xce\xb4"),
+               ENTITY_DEF("xodot", 10752, "\xe2\xa8\x80"),
+               ENTITY_DEF("larrtl", 8610, "\xe2\x86\xa2"),
+               ENTITY_DEF("gsim", 8819, "\xe2\x89\xb3"),
+               ENTITY_DEF("ratail", 10522, "\xe2\xa4\x9a"),
+               ENTITY_DEF("vsubne", 8842, "\xe2\x8a\x8a\xef\xb8\x80"),
+               ENTITY_DEF("boxur", 9492, "\xe2\x94\x94"),
+               ENTITY_DEF("succsim", 8831, "\xe2\x89\xbf"),
+               ENTITY_DEF("triplus", 10809, "\xe2\xa8\xb9"),
+               ENTITY_DEF("nless", 8814, "\xe2\x89\xae"),
+               ENTITY_DEF("uharr", 8638, "\xe2\x86\xbe"),
+               ENTITY_DEF("lambda", 955, "\xce\xbb"),
+               ENTITY_DEF("uuml", 252, "\xc3\xbc"),
+               ENTITY_DEF("horbar", 8213, "\xe2\x80\x95"),
+               ENTITY_DEF("ccirc", 265, "\xc4\x89"),
+               ENTITY_DEF("sqcup", 8852, "\xe2\x8a\x94"),
+               ENTITY_DEF("Pscr", 119979, "\xf0\x9d\x92\xab"),
+               ENTITY_DEF("supsup", 10966, "\xe2\xab\x96"),
+               ENTITY_DEF("Cacute", 262, "\xc4\x86"),
+               ENTITY_DEF("upsih", 978, "\xcf\x92"),
+               ENTITY_DEF("precsim", 8830, "\xe2\x89\xbe"),
+               ENTITY_DEF("longrightarrow", 10230, "\xe2\x9f\xb6"),
+               ENTITY_DEF("circledR", 174, "\xc2\xae"),
+               ENTITY_DEF("UpTeeArrow", 8613, "\xe2\x86\xa5"),
+               ENTITY_DEF("bepsi", 1014, "\xcf\xb6"),
+               ENTITY_DEF("oast", 8859, "\xe2\x8a\x9b"),
+               ENTITY_DEF("yfr", 120118, "\xf0\x9d\x94\xb6"),
+               ENTITY_DEF("rdsh", 8627, "\xe2\x86\xb3"),
+               ENTITY_DEF("Ograve", 210, "\xc3\x92"),
+               ENTITY_DEF("LeftVectorBar", 10578, "\xe2\xa5\x92"),
+               ENTITY_DEF("NotNestedLessLess", 10913, "\xe2\xaa\xa1\xcc\xb8"),
+               ENTITY_DEF("Jscr", 119973, "\xf0\x9d\x92\xa5"),
+               ENTITY_DEF("psi", 968, "\xcf\x88"),
+               ENTITY_DEF("orarr", 8635, "\xe2\x86\xbb"),
+               ENTITY_DEF("Subset", 8912, "\xe2\x8b\x90"),
+               ENTITY_DEF("curarr", 8631, "\xe2\x86\xb7"),
+               ENTITY_DEF("CirclePlus", 8853, "\xe2\x8a\x95"),
+               ENTITY_DEF("gtrless", 8823, "\xe2\x89\xb7"),
+               ENTITY_DEF("nvle", 8804, "\xe2\x89\xa4\xe2\x83\x92"),
+               ENTITY_DEF("prop", 8733, "\xe2\x88\x9d"),
+               ENTITY_DEF("gEl", 10892, "\xe2\xaa\x8c"),
+               ENTITY_DEF("gtlPar", 10645, "\xe2\xa6\x95"),
+               ENTITY_DEF("frasl", 8260, "\xe2\x81\x84"),
+               ENTITY_DEF("nearr", 8599, "\xe2\x86\x97"),
+               ENTITY_DEF("NotSubsetEqual", 8840, "\xe2\x8a\x88"),
+               ENTITY_DEF("planck", 8463, "\xe2\x84\x8f"),
+               ENTITY_DEF("Uuml", 220, "\xc3\x9c"),
+               ENTITY_DEF("spadesuit", 9824, "\xe2\x99\xa0"),
+               ENTITY_DEF("sect", 167, "\xc2\xa7"),
+               ENTITY_DEF("cdot", 267, "\xc4\x8b"),
+               ENTITY_DEF("boxVh", 9579, "\xe2\x95\xab"),
+               ENTITY_DEF("zscr", 120015, "\xf0\x9d\x93\x8f"),
+               ENTITY_DEF("nsqsube", 8930, "\xe2\x8b\xa2"),
+               ENTITY_DEF("grave", 96, "\x60"),
+               ENTITY_DEF("angrtvb", 8894, "\xe2\x8a\xbe"),
+               ENTITY_DEF("MediumSpace", 8287, "\xe2\x81\x9f"),
+               ENTITY_DEF("Ntilde", 209, "\xc3\x91"),
+               ENTITY_DEF("solb", 10692, "\xe2\xa7\x84"),
+               ENTITY_DEF("angzarr", 9084, "\xe2\x8d\xbc"),
+               ENTITY_DEF("nopf", 120159, "\xf0\x9d\x95\x9f"),
+               ENTITY_DEF("rtrif", 9656, "\xe2\x96\xb8"),
+               ENTITY_DEF("nrightarrow", 8603, "\xe2\x86\x9b"),
+               ENTITY_DEF("Kappa", 922, "\xce\x9a"),
+               ENTITY_DEF("simrarr", 10610, "\xe2\xa5\xb2"),
+               ENTITY_DEF("imacr", 299, "\xc4\xab"),
+               ENTITY_DEF("vrtri", 8883, "\xe2\x8a\xb3"),
+               ENTITY_DEF("part", 8706, "\xe2\x88\x82"),
+               ENTITY_DEF("esim", 8770, "\xe2\x89\x82"),
+               ENTITY_DEF("atilde", 227, "\xc3\xa3"),
+               ENTITY_DEF("DownRightTeeVector", 10591, "\xe2\xa5\x9f"),
+               ENTITY_DEF("jcirc", 309, "\xc4\xb5"),
+               ENTITY_DEF("Ecaron", 282, "\xc4\x9a"),
+               ENTITY_DEF("VerticalSeparator", 10072, "\xe2\x9d\x98"),
+               ENTITY_DEF("rHar", 10596, "\xe2\xa5\xa4"),
+               ENTITY_DEF("rcaron", 345, "\xc5\x99"),
+               ENTITY_DEF("subnE", 10955, "\xe2\xab\x8b"),
+               ENTITY_DEF("ii", 8520, "\xe2\x85\x88"),
+               ENTITY_DEF("Cconint", 8752, "\xe2\x88\xb0"),
+               ENTITY_DEF("Mcy", 1052, "\xd0\x9c"),
+               ENTITY_DEF("eqcolon", 8789, "\xe2\x89\x95"),
+               ENTITY_DEF("cupor", 10821, "\xe2\xa9\x85"),
+               ENTITY_DEF("DoubleUpArrow", 8657, "\xe2\x87\x91"),
+               ENTITY_DEF("boxbox", 10697, "\xe2\xa7\x89"),
+               ENTITY_DEF("setminus", 8726, "\xe2\x88\x96"),
+               ENTITY_DEF("Lleftarrow", 8666, "\xe2\x87\x9a"),
+               ENTITY_DEF("nang", 8736, "\xe2\x88\xa0\xe2\x83\x92"),
+               ENTITY_DEF("TRADE", 8482, "\xe2\x84\xa2"),
+               ENTITY_DEF("urcorner", 8989, "\xe2\x8c\x9d"),
+               ENTITY_DEF("lsqb", 91, "\x5b"),
+               ENTITY_DEF("cupcup", 10826, "\xe2\xa9\x8a"),
+               ENTITY_DEF("kjcy", 1116, "\xd1\x9c"),
+               ENTITY_DEF("llhard", 10603, "\xe2\xa5\xab"),
+               ENTITY_DEF("mumap", 8888, "\xe2\x8a\xb8"),
+               ENTITY_DEF("iiint", 8749, "\xe2\x88\xad"),
+               ENTITY_DEF("RightTee", 8866, "\xe2\x8a\xa2"),
+               ENTITY_DEF("Tcaron", 356, "\xc5\xa4"),
+               ENTITY_DEF("bigcirc", 9711, "\xe2\x97\xaf"),
+               ENTITY_DEF("trianglerighteq", 8885, "\xe2\x8a\xb5"),
+               ENTITY_DEF("NotLessGreater", 8824, "\xe2\x89\xb8"),
+               ENTITY_DEF("hArr", 8660, "\xe2\x87\x94"),
+               ENTITY_DEF("ocy", 1086, "\xd0\xbe"),
+               ENTITY_DEF("tosa", 10537, "\xe2\xa4\xa9"),
+               ENTITY_DEF("twixt", 8812, "\xe2\x89\xac"),
+               ENTITY_DEF("square", 9633, "\xe2\x96\xa1"),
+               ENTITY_DEF("Otimes", 10807, "\xe2\xa8\xb7"),
+               ENTITY_DEF("Kcedil", 310, "\xc4\xb6"),
+               ENTITY_DEF("beth", 8502, "\xe2\x84\xb6"),
+               ENTITY_DEF("triminus", 10810, "\xe2\xa8\xba"),
+               ENTITY_DEF("nlArr", 8653, "\xe2\x87\x8d"),
+               ENTITY_DEF("Oacute", 211, "\xc3\x93"),
+               ENTITY_DEF("zwnj", 8204, "\xe2\x80\x8c"),
+               ENTITY_DEF("ll", 8810, "\xe2\x89\xaa"),
+               ENTITY_DEF("smashp", 10803, "\xe2\xa8\xb3"),
+               ENTITY_DEF("ngeqq", 8807, "\xe2\x89\xa7\xcc\xb8"),
+               ENTITY_DEF("rnmid", 10990, "\xe2\xab\xae"),
+               ENTITY_DEF("nwArr", 8662, "\xe2\x87\x96"),
+               ENTITY_DEF("RightUpDownVector", 10575, "\xe2\xa5\x8f"),
+               ENTITY_DEF("lbbrk", 10098, "\xe2\x9d\xb2"),
+               ENTITY_DEF("compfn", 8728, "\xe2\x88\x98"),
+               ENTITY_DEF("eDDot", 10871, "\xe2\xa9\xb7"),
+               ENTITY_DEF("Jsercy", 1032, "\xd0\x88"),
+               ENTITY_DEF("HARDcy", 1066, "\xd0\xaa"),
+               ENTITY_DEF("nexists", 8708, "\xe2\x88\x84"),
+               ENTITY_DEF("theta", 952, "\xce\xb8"),
+               ENTITY_DEF("plankv", 8463, "\xe2\x84\x8f"),
+               ENTITY_DEF("sup2", 178, "\xc2\xb2"),
+               ENTITY_DEF("lessapprox", 10885, "\xe2\xaa\x85"),
+               ENTITY_DEF("gdot", 289, "\xc4\xa1"),
+               ENTITY_DEF("angmsdae", 10668, "\xe2\xa6\xac"),
+               ENTITY_DEF("Superset", 8835, "\xe2\x8a\x83"),
+               ENTITY_DEF("prap", 10935, "\xe2\xaa\xb7"),
+               ENTITY_DEF("Zscr", 119989, "\xf0\x9d\x92\xb5"),
+               ENTITY_DEF("nsucc", 8833, "\xe2\x8a\x81"),
+               ENTITY_DEF("supseteqq", 10950, "\xe2\xab\x86"),
+               ENTITY_DEF("UpTee", 8869, "\xe2\x8a\xa5"),
+               ENTITY_DEF("LowerLeftArrow", 8601, "\xe2\x86\x99"),
+               ENTITY_DEF("ssmile", 8995, "\xe2\x8c\xa3"),
+               ENTITY_DEF("niv", 8715, "\xe2\x88\x8b"),
+               ENTITY_DEF("bigvee", 8897, "\xe2\x8b\x81"),
+               ENTITY_DEF("kscr", 120000, "\xf0\x9d\x93\x80"),
+               ENTITY_DEF("xutri", 9651, "\xe2\x96\xb3"),
+               ENTITY_DEF("caret", 8257, "\xe2\x81\x81"),
+               ENTITY_DEF("caron", 711, "\xcb\x87"),
+               ENTITY_DEF("Wedge", 8896, "\xe2\x8b\x80"),
+               ENTITY_DEF("sdotb", 8865, "\xe2\x8a\xa1"),
+               ENTITY_DEF("bigoplus", 10753, "\xe2\xa8\x81"),
+               ENTITY_DEF("Breve", 728, "\xcb\x98"),
+               ENTITY_DEF("ImaginaryI", 8520, "\xe2\x85\x88"),
+               ENTITY_DEF("longmapsto", 10236, "\xe2\x9f\xbc"),
+               ENTITY_DEF("boxVH", 9580, "\xe2\x95\xac"),
+               ENTITY_DEF("lozenge", 9674, "\xe2\x97\x8a"),
+               ENTITY_DEF("toea", 10536, "\xe2\xa4\xa8"),
+               ENTITY_DEF("nbumpe", 8783, "\xe2\x89\x8f\xcc\xb8"),
+               ENTITY_DEF("gcirc", 285, "\xc4\x9d"),
+               ENTITY_DEF("NotHumpEqual", 8783, "\xe2\x89\x8f\xcc\xb8"),
+               ENTITY_DEF("pre", 10927, "\xe2\xaa\xaf"),
+               ENTITY_DEF("ascr", 119990, "\xf0\x9d\x92\xb6"),
+               ENTITY_DEF("Acirc", 194, "\xc3\x82"),
+               ENTITY_DEF("questeq", 8799, "\xe2\x89\x9f"),
+               ENTITY_DEF("ncaron", 328, "\xc5\x88"),
+               ENTITY_DEF("LeftTeeArrow", 8612, "\xe2\x86\xa4"),
+               ENTITY_DEF("xcirc", 9711, "\xe2\x97\xaf"),
+               ENTITY_DEF("swarr", 8601, "\xe2\x86\x99"),
+               ENTITY_DEF("MinusPlus", 8723, "\xe2\x88\x93"),
+               ENTITY_DEF("plus", 43, "\x2b"),
+               ENTITY_DEF("NotDoubleVerticalBar", 8742, "\xe2\x88\xa6"),
+               ENTITY_DEF("rppolint", 10770, "\xe2\xa8\x92"),
+               ENTITY_DEF("NotTildeFullEqual", 8775, "\xe2\x89\x87"),
+               ENTITY_DEF("ltdot", 8918, "\xe2\x8b\x96"),
+               ENTITY_DEF("NotNestedGreaterGreater", 10914, "\xe2\xaa\xa2\xcc\xb8"),
+               ENTITY_DEF("Lscr", 8466, "\xe2\x84\x92"),
+               ENTITY_DEF("pitchfork", 8916, "\xe2\x8b\x94"),
+               ENTITY_DEF("Eopf", 120124, "\xf0\x9d\x94\xbc"),
+               ENTITY_DEF("ropf", 120163, "\xf0\x9d\x95\xa3"),
+               ENTITY_DEF("Delta", 916, "\xce\x94"),
+               ENTITY_DEF("lozf", 10731, "\xe2\xa7\xab"),
+               ENTITY_DEF("RightTeeVector", 10587, "\xe2\xa5\x9b"),
+               ENTITY_DEF("UpDownArrow", 8597, "\xe2\x86\x95"),
+               ENTITY_DEF("bump", 8782, "\xe2\x89\x8e"),
+               ENTITY_DEF("Rscr", 8475, "\xe2\x84\x9b"),
+               ENTITY_DEF("slarr", 8592, "\xe2\x86\x90"),
+               ENTITY_DEF("lcy", 1083, "\xd0\xbb"),
+               ENTITY_DEF("Vee", 8897, "\xe2\x8b\x81"),
+               ENTITY_DEF("Iogon", 302, "\xc4\xae"),
+               ENTITY_DEF("minus", 8722, "\xe2\x88\x92"),
+               ENTITY_DEF("GreaterFullEqual", 8807, "\xe2\x89\xa7"),
+               ENTITY_DEF("xhArr", 10234, "\xe2\x9f\xba"),
+               ENTITY_DEF("shortmid", 8739, "\xe2\x88\xa3"),
+               ENTITY_DEF("DoubleDownArrow", 8659, "\xe2\x87\x93"),
+               ENTITY_DEF("Wscr", 119986, "\xf0\x9d\x92\xb2"),
+               ENTITY_DEF("rang", 10217, "\xe2\x9f\xa9"),
+               ENTITY_DEF("lcub", 123, "\x7b"),
+               ENTITY_DEF("mnplus", 8723, "\xe2\x88\x93"),
+               ENTITY_DEF("ulcrop", 8975, "\xe2\x8c\x8f"),
+               ENTITY_DEF("wfr", 120116, "\xf0\x9d\x94\xb4"),
+               ENTITY_DEF("DifferentialD", 8518, "\xe2\x85\x86"),
+               ENTITY_DEF("ThinSpace", 8201, "\xe2\x80\x89"),
+               ENTITY_DEF("NotGreaterGreater", 8811, "\xe2\x89\xab\xcc\xb8"),
+               ENTITY_DEF("Topf", 120139, "\xf0\x9d\x95\x8b"),
+               ENTITY_DEF("sbquo", 8218, "\xe2\x80\x9a"),
+               ENTITY_DEF("sdot", 8901, "\xe2\x8b\x85"),
+               ENTITY_DEF("DoubleLeftTee", 10980, "\xe2\xab\xa4"),
+               ENTITY_DEF("vBarv", 10985, "\xe2\xab\xa9"),
+               ENTITY_DEF("subne", 8842, "\xe2\x8a\x8a"),
+               ENTITY_DEF("gtrdot", 8919, "\xe2\x8b\x97"),
+               ENTITY_DEF("opar", 10679, "\xe2\xa6\xb7"),
+               ENTITY_DEF("apid", 8779, "\xe2\x89\x8b"),
+               ENTITY_DEF("Cross", 10799, "\xe2\xa8\xaf"),
+               ENTITY_DEF("lhblk", 9604, "\xe2\x96\x84"),
+               ENTITY_DEF("capcap", 10827, "\xe2\xa9\x8b"),
+               ENTITY_DEF("midast", 42, "\x2a"),
+               ENTITY_DEF("lscr", 120001, "\xf0\x9d\x93\x81"),
+               ENTITY_DEF("nGt", 8811, "\xe2\x89\xab\xe2\x83\x92"),
+               ENTITY_DEF("Euml", 203, "\xc3\x8b"),
+               ENTITY_DEF("blacktriangledown", 9662, "\xe2\x96\xbe"),
+               ENTITY_DEF("Rcy", 1056, "\xd0\xa0"),
+               ENTITY_DEF("dfisht", 10623, "\xe2\xa5\xbf"),
+               ENTITY_DEF("dashv", 8867, "\xe2\x8a\xa3"),
+               ENTITY_DEF("ast", 42, "\x2a"),
+               ENTITY_DEF("ContourIntegral", 8750, "\xe2\x88\xae"),
+               ENTITY_DEF("Ofr", 120082, "\xf0\x9d\x94\x92"),
+               ENTITY_DEF("Lcy", 1051, "\xd0\x9b"),
+               ENTITY_DEF("nltrie", 8940, "\xe2\x8b\xac"),
+               ENTITY_DEF("ShortUpArrow", 8593, "\xe2\x86\x91"),
+               ENTITY_DEF("acy", 1072, "\xd0\xb0"),
+               ENTITY_DEF("rightarrow", 8594, "\xe2\x86\x92"),
+               ENTITY_DEF("UnderBar", 95, "\x5f"),
+               ENTITY_DEF("LongLeftArrow", 10229, "\xe2\x9f\xb5"),
+               ENTITY_DEF("andd", 10844, "\xe2\xa9\x9c"),
+               ENTITY_DEF("xlarr", 10229, "\xe2\x9f\xb5"),
+               ENTITY_DEF("percnt", 37, "\x25"),
+               ENTITY_DEF("rharu", 8640, "\xe2\x87\x80"),
+               ENTITY_DEF("plusdo", 8724, "\xe2\x88\x94"),
+               ENTITY_DEF("TScy", 1062, "\xd0\xa6"),
+               ENTITY_DEF("kcy", 1082, "\xd0\xba"),
+               ENTITY_DEF("boxVR", 9568, "\xe2\x95\xa0"),
+               ENTITY_DEF("looparrowleft", 8619, "\xe2\x86\xab"),
+               ENTITY_DEF("scirc", 349, "\xc5\x9d"),
+               ENTITY_DEF("drcorn", 8991, "\xe2\x8c\x9f"),
+               ENTITY_DEF("iiota", 8489, "\xe2\x84\xa9"),
+               ENTITY_DEF("Zcy", 1047, "\xd0\x97"),
+               ENTITY_DEF("frac58", 8541, "\xe2\x85\x9d"),
+               ENTITY_DEF("alpha", 945, "\xce\xb1"),
+               ENTITY_DEF("daleth", 8504, "\xe2\x84\xb8"),
+               ENTITY_DEF("gtreqless", 8923, "\xe2\x8b\x9b"),
+               ENTITY_DEF("tstrok", 359, "\xc5\xa7"),
+               ENTITY_DEF("plusb", 8862, "\xe2\x8a\x9e"),
+               ENTITY_DEF("odsold", 10684, "\xe2\xa6\xbc"),
+               ENTITY_DEF("varsupsetneqq", 10956, "\xe2\xab\x8c\xef\xb8\x80"),
+               ENTITY_DEF("otilde", 245, "\xc3\xb5"),
+               ENTITY_DEF("gtcir", 10874, "\xe2\xa9\xba"),
+               ENTITY_DEF("lltri", 9722, "\xe2\x97\xba"),
+               ENTITY_DEF("rx", 8478, "\xe2\x84\x9e"),
+               ENTITY_DEF("ljcy", 1113, "\xd1\x99"),
+               ENTITY_DEF("parsim", 10995, "\xe2\xab\xb3"),
+               ENTITY_DEF("NotElement", 8713, "\xe2\x88\x89"),
+               ENTITY_DEF("plusmn", 177, "\xc2\xb1"),
+               ENTITY_DEF("varsubsetneq", 8842, "\xe2\x8a\x8a\xef\xb8\x80"),
+               ENTITY_DEF("subset", 8834, "\xe2\x8a\x82"),
+               ENTITY_DEF("awint", 10769, "\xe2\xa8\x91"),
+               ENTITY_DEF("laemptyv", 10676, "\xe2\xa6\xb4"),
+               ENTITY_DEF("phiv", 981, "\xcf\x95"),
+               ENTITY_DEF("sfrown", 8994, "\xe2\x8c\xa2"),
+               ENTITY_DEF("DoubleUpDownArrow", 8661, "\xe2\x87\x95"),
+               ENTITY_DEF("lpar", 40, "\x28"),
+               ENTITY_DEF("frac45", 8536, "\xe2\x85\x98"),
+               ENTITY_DEF("rBarr", 10511, "\xe2\xa4\x8f"),
+               ENTITY_DEF("npolint", 10772, "\xe2\xa8\x94"),
+               ENTITY_DEF("emacr", 275, "\xc4\x93"),
+               ENTITY_DEF("maltese", 10016, "\xe2\x9c\xa0"),
+               ENTITY_DEF("PlusMinus", 177, "\xc2\xb1"),
+               ENTITY_DEF("ReverseEquilibrium", 8651, "\xe2\x87\x8b"),
+               ENTITY_DEF("oscr", 8500, "\xe2\x84\xb4"),
+               ENTITY_DEF("blacksquare", 9642, "\xe2\x96\xaa"),
+               ENTITY_DEF("TSHcy", 1035, "\xd0\x8b"),
+               ENTITY_DEF("gap", 10886, "\xe2\xaa\x86"),
+               ENTITY_DEF("xnis", 8955, "\xe2\x8b\xbb"),
+               ENTITY_DEF("Ll", 8920, "\xe2\x8b\x98"),
+               ENTITY_DEF("PrecedesEqual", 10927, "\xe2\xaa\xaf"),
+               ENTITY_DEF("incare", 8453, "\xe2\x84\x85"),
+               ENTITY_DEF("nharr", 8622, "\xe2\x86\xae"),
+               ENTITY_DEF("varnothing", 8709, "\xe2\x88\x85"),
+               ENTITY_DEF("ShortDownArrow", 8595, "\xe2\x86\x93"),
+               ENTITY_DEF("nbsp", 160, " "),
+               ENTITY_DEF("asympeq", 8781, "\xe2\x89\x8d"),
+               ENTITY_DEF("rbrkslu", 10640, "\xe2\xa6\x90"),
+               ENTITY_DEF("rho", 961, "\xcf\x81"),
+               ENTITY_DEF("Mscr", 8499, "\xe2\x84\xb3"),
+               ENTITY_DEF("eth", 240, "\xc3\xb0"),
+               ENTITY_DEF("suplarr", 10619, "\xe2\xa5\xbb"),
+               ENTITY_DEF("Tab", 9, "\x09"),
+               ENTITY_DEF("omicron", 959, "\xce\xbf"),
+               ENTITY_DEF("blacktriangle", 9652, "\xe2\x96\xb4"),
+               ENTITY_DEF("nldr", 8229, "\xe2\x80\xa5"),
+               ENTITY_DEF("downharpoonleft", 8643, "\xe2\x87\x83"),
+               ENTITY_DEF("circledcirc", 8858, "\xe2\x8a\x9a"),
+               ENTITY_DEF("leftleftarrows", 8647, "\xe2\x87\x87"),
+               ENTITY_DEF("NotHumpDownHump", 8782, "\xe2\x89\x8e\xcc\xb8"),
+               ENTITY_DEF("nvgt", 62, "\x3e\xe2\x83\x92"),
+               ENTITY_DEF("rhard", 8641, "\xe2\x87\x81"),
+               ENTITY_DEF("nGg", 8921, "\xe2\x8b\x99\xcc\xb8"),
+               ENTITY_DEF("lurdshar", 10570, "\xe2\xa5\x8a"),
+               ENTITY_DEF("cirE", 10691, "\xe2\xa7\x83"),
+               ENTITY_DEF("isinE", 8953, "\xe2\x8b\xb9"),
+               ENTITY_DEF("eparsl", 10723, "\xe2\xa7\xa3"),
+               ENTITY_DEF("RightAngleBracket", 10217, "\xe2\x9f\xa9"),
+               ENTITY_DEF("hcirc", 293, "\xc4\xa5"),
+               ENTITY_DEF("bumpeq", 8783, "\xe2\x89\x8f"),
+               ENTITY_DEF("cire", 8791, "\xe2\x89\x97"),
+               ENTITY_DEF("dotplus", 8724, "\xe2\x88\x94"),
+               ENTITY_DEF("itilde", 297, "\xc4\xa9"),
+               ENTITY_DEF("uwangle", 10663, "\xe2\xa6\xa7"),
+               ENTITY_DEF("rlhar", 8652, "\xe2\x87\x8c"),
+               ENTITY_DEF("rbrace", 125, "\x7d"),
+               ENTITY_DEF("mid", 8739, "\xe2\x88\xa3"),
+               ENTITY_DEF("el", 10905, "\xe2\xaa\x99"),
+               ENTITY_DEF("KJcy", 1036, "\xd0\x8c"),
+               ENTITY_DEF("odiv", 10808, "\xe2\xa8\xb8"),
+               ENTITY_DEF("amacr", 257, "\xc4\x81"),
+               ENTITY_DEF("qprime", 8279, "\xe2\x81\x97"),
+               ENTITY_DEF("tcedil", 355, "\xc5\xa3"),
+               ENTITY_DEF("UpArrowDownArrow", 8645, "\xe2\x87\x85"),
+               ENTITY_DEF("spades", 9824, "\xe2\x99\xa0"),
+               ENTITY_DEF("napos", 329, "\xc5\x89"),
+               ENTITY_DEF("straightepsilon", 1013, "\xcf\xb5"),
+               ENTITY_DEF("CupCap", 8781, "\xe2\x89\x8d"),
+               ENTITY_DEF("Oopf", 120134, "\xf0\x9d\x95\x86"),
+               ENTITY_DEF("sub", 8834, "\xe2\x8a\x82"),
+               ENTITY_DEF("ohm", 937, "\xce\xa9"),
+               ENTITY_DEF("UnderBrace", 9183, "\xe2\x8f\x9f"),
+               ENTITY_DEF("looparrowright", 8620, "\xe2\x86\xac"),
+               ENTITY_DEF("xotime", 10754, "\xe2\xa8\x82"),
+               ENTITY_DEF("ntgl", 8825, "\xe2\x89\xb9"),
+               ENTITY_DEF("minusdu", 10794, "\xe2\xa8\xaa"),
+               ENTITY_DEF("rarrb", 8677, "\xe2\x87\xa5"),
+               ENTITY_DEF("nvlArr", 10498, "\xe2\xa4\x82"),
+               ENTITY_DEF("triangle", 9653, "\xe2\x96\xb5"),
+               ENTITY_DEF("nacute", 324, "\xc5\x84"),
+               ENTITY_DEF("boxHD", 9574, "\xe2\x95\xa6"),
+               ENTITY_DEF("ratio", 8758, "\xe2\x88\xb6"),
+               ENTITY_DEF("larrsim", 10611, "\xe2\xa5\xb3"),
+               ENTITY_DEF("LessLess", 10913, "\xe2\xaa\xa1"),
+               ENTITY_DEF("yacy", 1103, "\xd1\x8f"),
+               ENTITY_DEF("ctdot", 8943, "\xe2\x8b\xaf"),
+               ENTITY_DEF("and", 8743, "\xe2\x88\xa7"),
+               ENTITY_DEF("lrtri", 8895, "\xe2\x8a\xbf"),
+               ENTITY_DEF("eDot", 8785, "\xe2\x89\x91"),
+               ENTITY_DEF("sqsub", 8847, "\xe2\x8a\x8f"),
+               ENTITY_DEF("real", 8476, "\xe2\x84\x9c"),
+               ENTITY_DEF("Dcy", 1044, "\xd0\x94"),
+               ENTITY_DEF("vartheta", 977, "\xcf\x91"),
+               ENTITY_DEF("nsub", 8836, "\xe2\x8a\x84"),
+               ENTITY_DEF("DownTee", 8868, "\xe2\x8a\xa4"),
+               ENTITY_DEF("acute", 180, "\xc2\xb4"),
+               ENTITY_DEF("GreaterLess", 8823, "\xe2\x89\xb7"),
+               ENTITY_DEF("supplus", 10944, "\xe2\xab\x80"),
+               ENTITY_DEF("Vbar", 10987, "\xe2\xab\xab"),
+               ENTITY_DEF("divideontimes", 8903, "\xe2\x8b\x87"),
+               ENTITY_DEF("lsim", 8818, "\xe2\x89\xb2"),
+               ENTITY_DEF("nearhk", 10532, "\xe2\xa4\xa4"),
+               ENTITY_DEF("nLtv", 8810, "\xe2\x89\xaa\xcc\xb8"),
+               ENTITY_DEF("RuleDelayed", 10740, "\xe2\xa7\xb4"),
+               ENTITY_DEF("smile", 8995, "\xe2\x8c\xa3"),
+               ENTITY_DEF("coprod", 8720, "\xe2\x88\x90"),
+               ENTITY_DEF("imof", 8887, "\xe2\x8a\xb7"),
+               ENTITY_DEF("ecy", 1101, "\xd1\x8d"),
+               ENTITY_DEF("RightCeiling", 8969, "\xe2\x8c\x89"),
+               ENTITY_DEF("dlcorn", 8990, "\xe2\x8c\x9e"),
+               ENTITY_DEF("Nu", 925, "\xce\x9d"),
+               ENTITY_DEF("frac18", 8539, "\xe2\x85\x9b"),
+               ENTITY_DEF("diamond", 8900, "\xe2\x8b\x84"),
+               ENTITY_DEF("Icirc", 206, "\xc3\x8e"),
+               ENTITY_DEF("ngeq", 8817, "\xe2\x89\xb1"),
+               ENTITY_DEF("epsilon", 949, "\xce\xb5"),
+               ENTITY_DEF("fork", 8916, "\xe2\x8b\x94"),
+               ENTITY_DEF("xrarr", 10230, "\xe2\x9f\xb6"),
+               ENTITY_DEF("racute", 341, "\xc5\x95"),
+               ENTITY_DEF("ntlg", 8824, "\xe2\x89\xb8"),
+               ENTITY_DEF("xvee", 8897, "\xe2\x8b\x81"),
+               ENTITY_DEF("LeftArrowRightArrow", 8646, "\xe2\x87\x86"),
+               ENTITY_DEF("DownLeftRightVector", 10576, "\xe2\xa5\x90"),
+               ENTITY_DEF("Eacute", 201, "\xc3\x89"),
+               ENTITY_DEF("gimel", 8503, "\xe2\x84\xb7"),
+               ENTITY_DEF("rtimes", 8906, "\xe2\x8b\x8a"),
+               ENTITY_DEF("forall", 8704, "\xe2\x88\x80"),
+               ENTITY_DEF("DiacriticalDoubleAcute", 733, "\xcb\x9d"),
+               ENTITY_DEF("dArr", 8659, "\xe2\x87\x93"),
+               ENTITY_DEF("fallingdotseq", 8786, "\xe2\x89\x92"),
+               ENTITY_DEF("Aogon", 260, "\xc4\x84"),
+               ENTITY_DEF("PartialD", 8706, "\xe2\x88\x82"),
+               ENTITY_DEF("mapstoup", 8613, "\xe2\x86\xa5"),
+               ENTITY_DEF("die", 168, "\xc2\xa8"),
+               ENTITY_DEF("ngt", 8815, "\xe2\x89\xaf"),
+               ENTITY_DEF("vcy", 1074, "\xd0\xb2"),
+               ENTITY_DEF("fjlig", 0, "\x66\x6a"),
+               ENTITY_DEF("submult", 10945, "\xe2\xab\x81"),
+               ENTITY_DEF("ubrcy", 1118, "\xd1\x9e"),
+               ENTITY_DEF("ovbar", 9021, "\xe2\x8c\xbd"),
+               ENTITY_DEF("bsime", 8909, "\xe2\x8b\x8d"),
+               ENTITY_DEF("precnsim", 8936, "\xe2\x8b\xa8"),
+               ENTITY_DEF("DiacriticalTilde", 732, "\xcb\x9c"),
+               ENTITY_DEF("cwint", 8753, "\xe2\x88\xb1"),
+               ENTITY_DEF("Scy", 1057, "\xd0\xa1"),
+               ENTITY_DEF("NotGreaterEqual", 8817, "\xe2\x89\xb1"),
+               ENTITY_DEF("boxUR", 9562, "\xe2\x95\x9a"),
+               ENTITY_DEF("LessSlantEqual", 10877, "\xe2\xa9\xbd"),
+               ENTITY_DEF("Barwed", 8966, "\xe2\x8c\x86"),
+               ENTITY_DEF("supdot", 10942, "\xe2\xaa\xbe"),
+               ENTITY_DEF("gel", 8923, "\xe2\x8b\x9b"),
+               ENTITY_DEF("iscr", 119998, "\xf0\x9d\x92\xbe"),
+               ENTITY_DEF("doublebarwedge", 8966, "\xe2\x8c\x86"),
+               ENTITY_DEF("Idot", 304, "\xc4\xb0"),
+               ENTITY_DEF("DoubleDot", 168, "\xc2\xa8"),
+               ENTITY_DEF("rsquo", 8217, "\xe2\x80\x99"),
+               ENTITY_DEF("subsetneqq", 10955, "\xe2\xab\x8b"),
+               ENTITY_DEF("UpEquilibrium", 10606, "\xe2\xa5\xae"),
+               ENTITY_DEF("copysr", 8471, "\xe2\x84\x97"),
+               ENTITY_DEF("RightDoubleBracket", 10215, "\xe2\x9f\xa7"),
+               ENTITY_DEF("LeftRightVector", 10574, "\xe2\xa5\x8e"),
+               ENTITY_DEF("DownLeftVectorBar", 10582, "\xe2\xa5\x96"),
+               ENTITY_DEF("suphsub", 10967, "\xe2\xab\x97"),
+               ENTITY_DEF("cedil", 184, "\xc2\xb8"),
+               ENTITY_DEF("prurel", 8880, "\xe2\x8a\xb0"),
+               ENTITY_DEF("imagpart", 8465, "\xe2\x84\x91"),
+               ENTITY_DEF("Hscr", 8459, "\xe2\x84\x8b"),
+               ENTITY_DEF("jmath", 567, "\xc8\xb7"),
+               ENTITY_DEF("nrtrie", 8941, "\xe2\x8b\xad"),
+               ENTITY_DEF("nsup", 8837, "\xe2\x8a\x85"),
+               ENTITY_DEF("Ubrcy", 1038, "\xd0\x8e"),
+               ENTITY_DEF("succnsim", 8937, "\xe2\x8b\xa9"),
+               ENTITY_DEF("nesim", 8770, "\xe2\x89\x82\xcc\xb8"),
+               ENTITY_DEF("varepsilon", 1013, "\xcf\xb5"),
+               ENTITY_DEF("DoubleRightTee", 8872, "\xe2\x8a\xa8"),
+               ENTITY_DEF("not", 172, "\xc2\xac"),
+               ENTITY_DEF("lesdot", 10879, "\xe2\xa9\xbf"),
+               ENTITY_DEF("backepsilon", 1014, "\xcf\xb6"),
+               ENTITY_DEF("srarr", 8594, "\xe2\x86\x92"),
+               ENTITY_DEF("varsubsetneqq", 10955, "\xe2\xab\x8b\xef\xb8\x80"),
+               ENTITY_DEF("sqcap", 8851, "\xe2\x8a\x93"),
+               ENTITY_DEF("rightleftarrows", 8644, "\xe2\x87\x84"),
+               ENTITY_DEF("diams", 9830, "\xe2\x99\xa6"),
+               ENTITY_DEF("boxdR", 9554, "\xe2\x95\x92"),
+               ENTITY_DEF("ngeqslant", 10878, "\xe2\xa9\xbe\xcc\xb8"),
+               ENTITY_DEF("boxDR", 9556, "\xe2\x95\x94"),
+               ENTITY_DEF("sext", 10038, "\xe2\x9c\xb6"),
+               ENTITY_DEF("backsim", 8765, "\xe2\x88\xbd"),
+               ENTITY_DEF("nfr", 120107, "\xf0\x9d\x94\xab"),
+               ENTITY_DEF("CloseCurlyDoubleQuote", 8221, "\xe2\x80\x9d"),
+               ENTITY_DEF("npart", 8706, "\xe2\x88\x82\xcc\xb8"),
+               ENTITY_DEF("dharl", 8643, "\xe2\x87\x83"),
+               ENTITY_DEF("NewLine", 10, "\x0a"),
+               ENTITY_DEF("bigotimes", 10754, "\xe2\xa8\x82"),
+               ENTITY_DEF("lAtail", 10523, "\xe2\xa4\x9b"),
+               ENTITY_DEF("frac14", 188, "\xc2\xbc"),
+               ENTITY_DEF("or", 8744, "\xe2\x88\xa8"),
+               ENTITY_DEF("subedot", 10947, "\xe2\xab\x83"),
+               ENTITY_DEF("nmid", 8740, "\xe2\x88\xa4"),
+               ENTITY_DEF("DownArrowUpArrow", 8693, "\xe2\x87\xb5"),
+               ENTITY_DEF("icy", 1080, "\xd0\xb8"),
+               ENTITY_DEF("num", 35, "\x23"),
+               ENTITY_DEF("Gdot", 288, "\xc4\xa0"),
+               ENTITY_DEF("urcrop", 8974, "\xe2\x8c\x8e"),
+               ENTITY_DEF("epsiv", 1013, "\xcf\xb5"),
+               ENTITY_DEF("topcir", 10993, "\xe2\xab\xb1"),
+               ENTITY_DEF("ne", 8800, "\xe2\x89\xa0"),
+               ENTITY_DEF("osol", 8856, "\xe2\x8a\x98"),
+               ENTITY_DEF("amp", 38, "\x26"),
+               ENTITY_DEF("ncap", 10819, "\xe2\xa9\x83"),
+               ENTITY_DEF("Sscr", 119982, "\xf0\x9d\x92\xae"),
+               ENTITY_DEF("sung", 9834, "\xe2\x99\xaa"),
+               ENTITY_DEF("ltri", 9667, "\xe2\x97\x83"),
+               ENTITY_DEF("frac25", 8534, "\xe2\x85\x96"),
+               ENTITY_DEF("DZcy", 1039, "\xd0\x8f"),
+               ENTITY_DEF("RightUpVector", 8638, "\xe2\x86\xbe"),
+               ENTITY_DEF("rsquor", 8217, "\xe2\x80\x99"),
+               ENTITY_DEF("uplus", 8846, "\xe2\x8a\x8e"),
+               ENTITY_DEF("triangleright", 9657, "\xe2\x96\xb9"),
+               ENTITY_DEF("lAarr", 8666, "\xe2\x87\x9a"),
+               ENTITY_DEF("HilbertSpace", 8459, "\xe2\x84\x8b"),
+               ENTITY_DEF("there4", 8756, "\xe2\x88\xb4"),
+               ENTITY_DEF("vscr", 120011, "\xf0\x9d\x93\x8b"),
+               ENTITY_DEF("cirscir", 10690, "\xe2\xa7\x82"),
+               ENTITY_DEF("roarr", 8702, "\xe2\x87\xbe"),
+               ENTITY_DEF("hslash", 8463, "\xe2\x84\x8f"),
+               ENTITY_DEF("supdsub", 10968, "\xe2\xab\x98"),
+               ENTITY_DEF("simg", 10910, "\xe2\xaa\x9e"),
+               ENTITY_DEF("trade", 8482, "\xe2\x84\xa2"),
+               ENTITY_DEF("searrow", 8600, "\xe2\x86\x98"),
+               ENTITY_DEF("DownLeftVector", 8637, "\xe2\x86\xbd"),
+               ENTITY_DEF("FilledSmallSquare", 9724, "\xe2\x97\xbc"),
+               ENTITY_DEF("prod", 8719, "\xe2\x88\x8f"),
+               ENTITY_DEF("oror", 10838, "\xe2\xa9\x96"),
+               ENTITY_DEF("udarr", 8645, "\xe2\x87\x85"),
+               ENTITY_DEF("jsercy", 1112, "\xd1\x98"),
+               ENTITY_DEF("tprime", 8244, "\xe2\x80\xb4"),
+               ENTITY_DEF("bprime", 8245, "\xe2\x80\xb5"),
+               ENTITY_DEF("malt", 10016, "\xe2\x9c\xa0"),
+               ENTITY_DEF("bigcup", 8899, "\xe2\x8b\x83"),
+               ENTITY_DEF("oint", 8750, "\xe2\x88\xae"),
+               ENTITY_DEF("female", 9792, "\xe2\x99\x80"),
+               ENTITY_DEF("omacr", 333, "\xc5\x8d"),
+               ENTITY_DEF("SquareSubsetEqual", 8849, "\xe2\x8a\x91"),
+               ENTITY_DEF("SucceedsEqual", 10928, "\xe2\xaa\xb0"),
+               ENTITY_DEF("plusacir", 10787, "\xe2\xa8\xa3"),
+               ENTITY_DEF("Gcirc", 284, "\xc4\x9c"),
+               ENTITY_DEF("lesdotor", 10883, "\xe2\xaa\x83"),
+               ENTITY_DEF("escr", 8495, "\xe2\x84\xaf"),
+               ENTITY_DEF("THORN", 222, "\xc3\x9e"),
+               ENTITY_DEF("UpArrowBar", 10514, "\xe2\xa4\x92"),
+               ENTITY_DEF("nvrtrie", 8885, "\xe2\x8a\xb5\xe2\x83\x92"),
+               ENTITY_DEF("varkappa", 1008, "\xcf\xb0"),
+               ENTITY_DEF("NotReverseElement", 8716, "\xe2\x88\x8c"),
+               ENTITY_DEF("zdot", 380, "\xc5\xbc"),
+               ENTITY_DEF("ExponentialE", 8519, "\xe2\x85\x87"),
+               ENTITY_DEF("lesseqgtr", 8922, "\xe2\x8b\x9a"),
+               ENTITY_DEF("cscr", 119992, "\xf0\x9d\x92\xb8"),
+               ENTITY_DEF("Dscr", 119967, "\xf0\x9d\x92\x9f"),
+               ENTITY_DEF("lthree", 8907, "\xe2\x8b\x8b"),
+               ENTITY_DEF("Ccedil", 199, "\xc3\x87"),
+               ENTITY_DEF("nge", 8817, "\xe2\x89\xb1"),
+               ENTITY_DEF("UpperLeftArrow", 8598, "\xe2\x86\x96"),
+               ENTITY_DEF("vDash", 8872, "\xe2\x8a\xa8"),
+               ENTITY_DEF("efDot", 8786, "\xe2\x89\x92"),
+               ENTITY_DEF("telrec", 8981, "\xe2\x8c\x95"),
+               ENTITY_DEF("vellip", 8942, "\xe2\x8b\xae"),
+               ENTITY_DEF("nrArr", 8655, "\xe2\x87\x8f"),
+               ENTITY_DEF("ugrave", 249, "\xc3\xb9"),
+               ENTITY_DEF("uring", 367, "\xc5\xaf"),
+               ENTITY_DEF("Bernoullis", 8492, "\xe2\x84\xac"),
+               ENTITY_DEF("nles", 10877, "\xe2\xa9\xbd\xcc\xb8"),
+               ENTITY_DEF("macr", 175, "\xc2\xaf"),
+               ENTITY_DEF("boxuR", 9560, "\xe2\x95\x98"),
+               ENTITY_DEF("clubsuit", 9827, "\xe2\x99\xa3"),
+               ENTITY_DEF("rightarrowtail", 8611, "\xe2\x86\xa3"),
+               ENTITY_DEF("epar", 8917, "\xe2\x8b\x95"),
+               ENTITY_DEF("ltcc", 10918, "\xe2\xaa\xa6"),
+               ENTITY_DEF("twoheadleftarrow", 8606, "\xe2\x86\x9e"),
+               ENTITY_DEF("aleph", 8501, "\xe2\x84\xb5"),
+               ENTITY_DEF("Colon", 8759, "\xe2\x88\xb7"),
+               ENTITY_DEF("vltri", 8882, "\xe2\x8a\xb2"),
+               ENTITY_DEF("quaternions", 8461, "\xe2\x84\x8d"),
+               ENTITY_DEF("rfr", 120111, "\xf0\x9d\x94\xaf"),
+               ENTITY_DEF("Ouml", 214, "\xc3\x96"),
+               ENTITY_DEF("rsh", 8625, "\xe2\x86\xb1"),
+               ENTITY_DEF("emptyv", 8709, "\xe2\x88\x85"),
+               ENTITY_DEF("sqsup", 8848, "\xe2\x8a\x90"),
+               ENTITY_DEF("marker", 9646, "\xe2\x96\xae"),
+               ENTITY_DEF("Efr", 120072, "\xf0\x9d\x94\x88"),
+               ENTITY_DEF("DotEqual", 8784, "\xe2\x89\x90"),
+               ENTITY_DEF("eqsim", 8770, "\xe2\x89\x82"),
+               ENTITY_DEF("NotSucceedsEqual", 10928, "\xe2\xaa\xb0\xcc\xb8"),
+               ENTITY_DEF("primes", 8473, "\xe2\x84\x99"),
+               ENTITY_DEF("times", 215, "\xc3\x97"),
+               ENTITY_DEF("rangd", 10642, "\xe2\xa6\x92"),
+               ENTITY_DEF("rightharpoonup", 8640, "\xe2\x87\x80"),
+               ENTITY_DEF("lrhard", 10605, "\xe2\xa5\xad"),
+               ENTITY_DEF("ape", 8778, "\xe2\x89\x8a"),
+               ENTITY_DEF("varsupsetneq", 8843, "\xe2\x8a\x8b\xef\xb8\x80"),
+               ENTITY_DEF("larrlp", 8619, "\xe2\x86\xab"),
+               ENTITY_DEF("NotPrecedesEqual", 10927, "\xe2\xaa\xaf\xcc\xb8"),
+               ENTITY_DEF("ulcorner", 8988, "\xe2\x8c\x9c"),
+               ENTITY_DEF("acd", 8767, "\xe2\x88\xbf"),
+               ENTITY_DEF("Hacek", 711, "\xcb\x87"),
+               ENTITY_DEF("xuplus", 10756, "\xe2\xa8\x84"),
+               ENTITY_DEF("therefore", 8756, "\xe2\x88\xb4"),
+               ENTITY_DEF("YIcy", 1031, "\xd0\x87"),
+               ENTITY_DEF("Tfr", 120087, "\xf0\x9d\x94\x97"),
+               ENTITY_DEF("Jcirc", 308, "\xc4\xb4"),
+               ENTITY_DEF("LessGreater", 8822, "\xe2\x89\xb6"),
+               ENTITY_DEF("Uring", 366, "\xc5\xae"),
+               ENTITY_DEF("Ugrave", 217, "\xc3\x99"),
+               ENTITY_DEF("rarr", 8594, "\xe2\x86\x92"),
+               ENTITY_DEF("wopf", 120168, "\xf0\x9d\x95\xa8"),
+               ENTITY_DEF("imath", 305, "\xc4\xb1"),
+               ENTITY_DEF("Yopf", 120144, "\xf0\x9d\x95\x90"),
+               ENTITY_DEF("colone", 8788, "\xe2\x89\x94"),
+               ENTITY_DEF("csube", 10961, "\xe2\xab\x91"),
+               ENTITY_DEF("odash", 8861, "\xe2\x8a\x9d"),
+               ENTITY_DEF("olarr", 8634, "\xe2\x86\xba"),
+               ENTITY_DEF("angrt", 8735, "\xe2\x88\x9f"),
+               ENTITY_DEF("NotLeftTriangleBar", 10703, "\xe2\xa7\x8f\xcc\xb8"),
+               ENTITY_DEF("GreaterEqual", 8805, "\xe2\x89\xa5"),
+               ENTITY_DEF("scnap", 10938, "\xe2\xaa\xba"),
+               ENTITY_DEF("pi", 960, "\xcf\x80"),
+               ENTITY_DEF("lesg", 8922, "\xe2\x8b\x9a\xef\xb8\x80"),
+               ENTITY_DEF("orderof", 8500, "\xe2\x84\xb4"),
+               ENTITY_DEF("uacute", 250, "\xc3\xba"),
+               ENTITY_DEF("Barv", 10983, "\xe2\xab\xa7"),
+               ENTITY_DEF("Theta", 920, "\xce\x98"),
+               ENTITY_DEF("leftrightsquigarrow", 8621, "\xe2\x86\xad"),
+               ENTITY_DEF("Atilde", 195, "\xc3\x83"),
+               ENTITY_DEF("cupdot", 8845, "\xe2\x8a\x8d"),
+               ENTITY_DEF("ntriangleright", 8939, "\xe2\x8b\xab"),
+               ENTITY_DEF("measuredangle", 8737, "\xe2\x88\xa1"),
+               ENTITY_DEF("jscr", 119999, "\xf0\x9d\x92\xbf"),
+               ENTITY_DEF("inodot", 305, "\xc4\xb1"),
+               ENTITY_DEF("mopf", 120158, "\xf0\x9d\x95\x9e"),
+               ENTITY_DEF("hkswarow", 10534, "\xe2\xa4\xa6"),
+               ENTITY_DEF("lopar", 10629, "\xe2\xa6\x85"),
+               ENTITY_DEF("thksim", 8764, "\xe2\x88\xbc"),
+               ENTITY_DEF("bkarow", 10509, "\xe2\xa4\x8d"),
+               ENTITY_DEF("rarrfs", 10526, "\xe2\xa4\x9e"),
+               ENTITY_DEF("ntrianglelefteq", 8940, "\xe2\x8b\xac"),
+               ENTITY_DEF("Bscr", 8492, "\xe2\x84\xac"),
+               ENTITY_DEF("topf", 120165, "\xf0\x9d\x95\xa5"),
+               ENTITY_DEF("Uacute", 218, "\xc3\x9a"),
+               ENTITY_DEF("lap", 10885, "\xe2\xaa\x85"),
+               ENTITY_DEF("djcy", 1106, "\xd1\x92"),
+               ENTITY_DEF("bopf", 120147, "\xf0\x9d\x95\x93"),
+               ENTITY_DEF("empty", 8709, "\xe2\x88\x85"),
+               ENTITY_DEF("LeftAngleBracket", 10216, "\xe2\x9f\xa8"),
+               ENTITY_DEF("Imacr", 298, "\xc4\xaa"),
+               ENTITY_DEF("ltcir", 10873, "\xe2\xa9\xb9"),
+               ENTITY_DEF("trisb", 10701, "\xe2\xa7\x8d"),
+               ENTITY_DEF("gjcy", 1107, "\xd1\x93"),
+               ENTITY_DEF("pr", 8826, "\xe2\x89\xba"),
+               ENTITY_DEF("Mu", 924, "\xce\x9c"),
+               ENTITY_DEF("ogon", 731, "\xcb\x9b"),
+               ENTITY_DEF("pertenk", 8241, "\xe2\x80\xb1"),
+               ENTITY_DEF("plustwo", 10791, "\xe2\xa8\xa7"),
+               ENTITY_DEF("Vfr", 120089, "\xf0\x9d\x94\x99"),
+               ENTITY_DEF("ApplyFunction", 8289, "\xe2\x81\xa1"),
+               ENTITY_DEF("Sub", 8912, "\xe2\x8b\x90"),
+               ENTITY_DEF("DoubleLeftRightArrow", 8660, "\xe2\x87\x94"),
+               ENTITY_DEF("Lmidot", 319, "\xc4\xbf"),
+               ENTITY_DEF("nwarrow", 8598, "\xe2\x86\x96"),
+               ENTITY_DEF("angrtvbd", 10653, "\xe2\xa6\x9d"),
+               ENTITY_DEF("fcy", 1092, "\xd1\x84"),
+               ENTITY_DEF("ltlarr", 10614, "\xe2\xa5\xb6"),
+               ENTITY_DEF("CircleMinus", 8854, "\xe2\x8a\x96"),
+               ENTITY_DEF("angmsdab", 10665, "\xe2\xa6\xa9"),
+               ENTITY_DEF("wedgeq", 8793, "\xe2\x89\x99"),
+               ENTITY_DEF("iogon", 303, "\xc4\xaf"),
+               ENTITY_DEF("laquo", 171, "\xc2\xab"),
+               ENTITY_DEF("NestedGreaterGreater", 8811, "\xe2\x89\xab"),
+               ENTITY_DEF("UnionPlus", 8846, "\xe2\x8a\x8e"),
+               ENTITY_DEF("CircleDot", 8857, "\xe2\x8a\x99"),
+               ENTITY_DEF("coloneq", 8788, "\xe2\x89\x94"),
+               ENTITY_DEF("csupe", 10962, "\xe2\xab\x92"),
+               ENTITY_DEF("tcaron", 357, "\xc5\xa5"),
+               ENTITY_DEF("GreaterTilde", 8819, "\xe2\x89\xb3"),
+               ENTITY_DEF("Map", 10501, "\xe2\xa4\x85"),
+               ENTITY_DEF("DoubleLongLeftArrow", 10232, "\xe2\x9f\xb8"),
+               ENTITY_DEF("Uparrow", 8657, "\xe2\x87\x91"),
+               ENTITY_DEF("scy", 1089, "\xd1\x81"),
+               ENTITY_DEF("llarr", 8647, "\xe2\x87\x87"),
+               ENTITY_DEF("rangle", 10217, "\xe2\x9f\xa9"),
+               ENTITY_DEF("sstarf", 8902, "\xe2\x8b\x86"),
+               ENTITY_DEF("InvisibleTimes", 8290, "\xe2\x81\xa2"),
+               ENTITY_DEF("egsdot", 10904, "\xe2\xaa\x98"),
+               ENTITY_DEF("target", 8982, "\xe2\x8c\x96"),
+               ENTITY_DEF("lesges", 10899, "\xe2\xaa\x93"),
+               ENTITY_DEF("curren", 164, "\xc2\xa4"),
+               ENTITY_DEF("yopf", 120170, "\xf0\x9d\x95\xaa"),
+               ENTITY_DEF("frac23", 8532, "\xe2\x85\x94"),
+               ENTITY_DEF("NotSucceedsTilde", 8831, "\xe2\x89\xbf\xcc\xb8"),
+               ENTITY_DEF("napprox", 8777, "\xe2\x89\x89"),
+               ENTITY_DEF("odblac", 337, "\xc5\x91"),
+               ENTITY_DEF("gammad", 989, "\xcf\x9d"),
+               ENTITY_DEF("dscr", 119993, "\xf0\x9d\x92\xb9"),
+               ENTITY_DEF("SupersetEqual", 8839, "\xe2\x8a\x87"),
+               ENTITY_DEF("squf", 9642, "\xe2\x96\xaa"),
+               ENTITY_DEF("Because", 8757, "\xe2\x88\xb5"),
+               ENTITY_DEF("sccue", 8829, "\xe2\x89\xbd"),
+               ENTITY_DEF("KHcy", 1061, "\xd0\xa5"),
+               ENTITY_DEF("Wcirc", 372, "\xc5\xb4"),
+               ENTITY_DEF("uparrow", 8593, "\xe2\x86\x91"),
+               ENTITY_DEF("lessgtr", 8822, "\xe2\x89\xb6"),
+               ENTITY_DEF("thickapprox", 8776, "\xe2\x89\x88"),
+               ENTITY_DEF("lbrksld", 10639, "\xe2\xa6\x8f"),
+               ENTITY_DEF("oslash", 248, "\xc3\xb8"),
+               ENTITY_DEF("NotCupCap", 8813, "\xe2\x89\xad"),
+               ENTITY_DEF("elinters", 9191, "\xe2\x8f\xa7"),
+               ENTITY_DEF("Assign", 8788, "\xe2\x89\x94"),
+               ENTITY_DEF("ClockwiseContourIntegral", 8754, "\xe2\x88\xb2"),
+               ENTITY_DEF("lfisht", 10620, "\xe2\xa5\xbc"),
+               ENTITY_DEF("DownArrow", 8595, "\xe2\x86\x93"),
+               ENTITY_DEF("Zdot", 379, "\xc5\xbb"),
+               ENTITY_DEF("xscr", 120013, "\xf0\x9d\x93\x8d"),
+               ENTITY_DEF("DiacriticalGrave", 96, "\x60"),
+               ENTITY_DEF("DoubleLongLeftRightArrow", 10234, "\xe2\x9f\xba"),
+               ENTITY_DEF("angle", 8736, "\xe2\x88\xa0"),
+               ENTITY_DEF("race", 8765, "\xe2\x88\xbd\xcc\xb1"),
+               ENTITY_DEF("Ascr", 119964, "\xf0\x9d\x92\x9c"),
+               ENTITY_DEF("Xscr", 119987, "\xf0\x9d\x92\xb3"),
+               ENTITY_DEF("acirc", 226, "\xc3\xa2"),
+               ENTITY_DEF("otimesas", 10806, "\xe2\xa8\xb6"),
+               ENTITY_DEF("gscr", 8458, "\xe2\x84\x8a"),
+               ENTITY_DEF("gcy", 1075, "\xd0\xb3"),
+               ENTITY_DEF("angmsdag", 10670, "\xe2\xa6\xae"),
+               ENTITY_DEF("tshcy", 1115, "\xd1\x9b"),
+               ENTITY_DEF("Acy", 1040, "\xd0\x90"),
+               ENTITY_DEF("NotGreaterLess", 8825, "\xe2\x89\xb9"),
+               ENTITY_DEF("dtdot", 8945, "\xe2\x8b\xb1"),
+               ENTITY_DEF("quot", 34, "\x22"),
+               ENTITY_DEF("micro", 181, "\xc2\xb5"),
+               ENTITY_DEF("simplus", 10788, "\xe2\xa8\xa4"),
+               ENTITY_DEF("nsupseteq", 8841, "\xe2\x8a\x89"),
+               ENTITY_DEF("Ufr", 120088, "\xf0\x9d\x94\x98"),
+               ENTITY_DEF("Pr", 10939, "\xe2\xaa\xbb"),
+               ENTITY_DEF("napid", 8779, "\xe2\x89\x8b\xcc\xb8"),
+               ENTITY_DEF("rceil", 8969, "\xe2\x8c\x89"),
+               ENTITY_DEF("boxtimes", 8864, "\xe2\x8a\xa0"),
+               ENTITY_DEF("erarr", 10609, "\xe2\xa5\xb1"),
+               ENTITY_DEF("downdownarrows", 8650, "\xe2\x87\x8a"),
+               ENTITY_DEF("Kfr", 120078, "\xf0\x9d\x94\x8e"),
+               ENTITY_DEF("mho", 8487, "\xe2\x84\xa7"),
+               ENTITY_DEF("scpolint", 10771, "\xe2\xa8\x93"),
+               ENTITY_DEF("vArr", 8661, "\xe2\x87\x95"),
+               ENTITY_DEF("Ccaron", 268, "\xc4\x8c"),
+               ENTITY_DEF("NotRightTriangle", 8939, "\xe2\x8b\xab"),
+               ENTITY_DEF("topbot", 9014, "\xe2\x8c\xb6"),
+               ENTITY_DEF("qopf", 120162, "\xf0\x9d\x95\xa2"),
+               ENTITY_DEF("eogon", 281, "\xc4\x99"),
+               ENTITY_DEF("luruhar", 10598, "\xe2\xa5\xa6"),
+               ENTITY_DEF("gtdot", 8919, "\xe2\x8b\x97"),
+               ENTITY_DEF("Egrave", 200, "\xc3\x88"),
+               ENTITY_DEF("roplus", 10798, "\xe2\xa8\xae"),
+               ENTITY_DEF("Intersection", 8898, "\xe2\x8b\x82"),
+               ENTITY_DEF("Uarr", 8607, "\xe2\x86\x9f"),
+               ENTITY_DEF("dcy", 1076, "\xd0\xb4"),
+               ENTITY_DEF("boxvl", 9508, "\xe2\x94\xa4"),
+               ENTITY_DEF("RightArrowBar", 8677, "\xe2\x87\xa5"),
+               ENTITY_DEF("yuml", 255, "\xc3\xbf"),
+               ENTITY_DEF("parallel", 8741, "\xe2\x88\xa5"),
+               ENTITY_DEF("succneqq", 10934, "\xe2\xaa\xb6"),
+               ENTITY_DEF("bemptyv", 10672, "\xe2\xa6\xb0"),
+               ENTITY_DEF("starf", 9733, "\xe2\x98\x85"),
+               ENTITY_DEF("OverBar", 8254, "\xe2\x80\xbe"),
+               ENTITY_DEF("Alpha", 913, "\xce\x91"),
+               ENTITY_DEF("LeftUpVectorBar", 10584, "\xe2\xa5\x98"),
+               ENTITY_DEF("ufr", 120114, "\xf0\x9d\x94\xb2"),
+               ENTITY_DEF("swarhk", 10534, "\xe2\xa4\xa6"),
+               ENTITY_DEF("GreaterEqualLess", 8923, "\xe2\x8b\x9b"),
+               ENTITY_DEF("sscr", 120008, "\xf0\x9d\x93\x88"),
+               ENTITY_DEF("Pi", 928, "\xce\xa0"),
+               ENTITY_DEF("boxh", 9472, "\xe2\x94\x80"),
+               ENTITY_DEF("frac16", 8537, "\xe2\x85\x99"),
+               ENTITY_DEF("lbrack", 91, "\x5b"),
+               ENTITY_DEF("vert", 124, "\x7c"),
+               ENTITY_DEF("precneqq", 10933, "\xe2\xaa\xb5"),
+               ENTITY_DEF("NotGreaterSlantEqual", 10878, "\xe2\xa9\xbe\xcc\xb8"),
+               ENTITY_DEF("Omega", 937, "\xce\xa9"),
+               ENTITY_DEF("uarr", 8593, "\xe2\x86\x91"),
+               ENTITY_DEF("boxVr", 9567, "\xe2\x95\x9f"),
+               ENTITY_DEF("ruluhar", 10600, "\xe2\xa5\xa8"),
+               ENTITY_DEF("ShortLeftArrow", 8592, "\xe2\x86\x90"),
+               ENTITY_DEF("Qfr", 120084, "\xf0\x9d\x94\x94"),
+               ENTITY_DEF("olt", 10688, "\xe2\xa7\x80"),
+               ENTITY_DEF("nequiv", 8802, "\xe2\x89\xa2"),
+               ENTITY_DEF("fscr", 119995, "\xf0\x9d\x92\xbb"),
+               ENTITY_DEF("rarrhk", 8618, "\xe2\x86\xaa"),
+               ENTITY_DEF("nsqsupe", 8931, "\xe2\x8b\xa3"),
+               ENTITY_DEF("nsubseteq", 8840, "\xe2\x8a\x88"),
+               ENTITY_DEF("numero", 8470, "\xe2\x84\x96"),
+               ENTITY_DEF("emsp14", 8197, "\xe2\x80\x85"),
+               ENTITY_DEF("gl", 8823, "\xe2\x89\xb7"),
+               ENTITY_DEF("ocirc", 244, "\xc3\xb4"),
+               ENTITY_DEF("weierp", 8472, "\xe2\x84\x98"),
+               ENTITY_DEF("boxvL", 9569, "\xe2\x95\xa1"),
+               ENTITY_DEF("RightArrowLeftArrow", 8644, "\xe2\x87\x84"),
+               ENTITY_DEF("Precedes", 8826, "\xe2\x89\xba"),
+               ENTITY_DEF("RightVector", 8640, "\xe2\x87\x80"),
+               ENTITY_DEF("xcup", 8899, "\xe2\x8b\x83"),
+               ENTITY_DEF("angmsdad", 10667, "\xe2\xa6\xab"),
+               ENTITY_DEF("gtrsim", 8819, "\xe2\x89\xb3"),
+               ENTITY_DEF("natural", 9838, "\xe2\x99\xae"),
+               ENTITY_DEF("nVdash", 8878, "\xe2\x8a\xae"),
+               ENTITY_DEF("RightTriangleEqual", 8885, "\xe2\x8a\xb5"),
+               ENTITY_DEF("dscy", 1109, "\xd1\x95"),
+               ENTITY_DEF("leftthreetimes", 8907, "\xe2\x8b\x8b"),
+               ENTITY_DEF("prsim", 8830, "\xe2\x89\xbe"),
+               ENTITY_DEF("Bcy", 1041, "\xd0\x91"),
+               ENTITY_DEF("Chi", 935, "\xce\xa7"),
+               ENTITY_DEF("timesb", 8864, "\xe2\x8a\xa0"),
+               ENTITY_DEF("Del", 8711, "\xe2\x88\x87"),
+               ENTITY_DEF("lmidot", 320, "\xc5\x80"),
+               ENTITY_DEF("RightDownVector", 8642, "\xe2\x87\x82"),
+               ENTITY_DEF("simdot", 10858, "\xe2\xa9\xaa"),
+               ENTITY_DEF("FilledVerySmallSquare", 9642, "\xe2\x96\xaa"),
+               ENTITY_DEF("NotLessSlantEqual", 10877, "\xe2\xa9\xbd\xcc\xb8"),
+               ENTITY_DEF("SucceedsTilde", 8831, "\xe2\x89\xbf"),
+               ENTITY_DEF("duarr", 8693, "\xe2\x87\xb5"),
+               ENTITY_DEF("apE", 10864, "\xe2\xa9\xb0"),
+               ENTITY_DEF("odot", 8857, "\xe2\x8a\x99"),
+               ENTITY_DEF("mldr", 8230, "\xe2\x80\xa6"),
+               ENTITY_DEF("Uarrocir", 10569, "\xe2\xa5\x89"),
+               ENTITY_DEF("nLl", 8920, "\xe2\x8b\x98\xcc\xb8"),
+               ENTITY_DEF("rarrpl", 10565, "\xe2\xa5\x85"),
+               ENTITY_DEF("cir", 9675, "\xe2\x97\x8b"),
+               ENTITY_DEF("blk14", 9617, "\xe2\x96\x91"),
+               ENTITY_DEF("VerticalLine", 124, "\x7c"),
+               ENTITY_DEF("jcy", 1081, "\xd0\xb9"),
+               ENTITY_DEF("filig", 64257, "\xef\xac\x81"),
+               ENTITY_DEF("LongRightArrow", 10230, "\xe2\x9f\xb6"),
+               ENTITY_DEF("beta", 946, "\xce\xb2"),
+               ENTITY_DEF("ccupssm", 10832, "\xe2\xa9\x90"),
+               ENTITY_DEF("supsub", 10964, "\xe2\xab\x94"),
+               ENTITY_DEF("spar", 8741, "\xe2\x88\xa5"),
+               ENTITY_DEF("Tstrok", 358, "\xc5\xa6"),
+               ENTITY_DEF("isinv", 8712, "\xe2\x88\x88"),
+               ENTITY_DEF("rightsquigarrow", 8605, "\xe2\x86\x9d"),
+               ENTITY_DEF("Diamond", 8900, "\xe2\x8b\x84"),
+               ENTITY_DEF("curlyeqsucc", 8927, "\xe2\x8b\x9f"),
+               ENTITY_DEF("ijlig", 307, "\xc4\xb3"),
+               ENTITY_DEF("puncsp", 8200, "\xe2\x80\x88"),
+               ENTITY_DEF("hamilt", 8459, "\xe2\x84\x8b"),
+               ENTITY_DEF("mapstoleft", 8612, "\xe2\x86\xa4"),
+               ENTITY_DEF("Copf", 8450, "\xe2\x84\x82"),
+               ENTITY_DEF("prnsim", 8936, "\xe2\x8b\xa8"),
+               ENTITY_DEF("DotDot", 8412, "\xe2\x83\x9c"),
+               ENTITY_DEF("lobrk", 10214, "\xe2\x9f\xa6"),
+               ENTITY_DEF("twoheadrightarrow", 8608, "\xe2\x86\xa0"),
+               ENTITY_DEF("ngE", 8807, "\xe2\x89\xa7\xcc\xb8"),
+               ENTITY_DEF("cylcty", 9005, "\xe2\x8c\xad"),
+               ENTITY_DEF("sube", 8838, "\xe2\x8a\x86"),
+               ENTITY_DEF("NotEqualTilde", 8770, "\xe2\x89\x82\xcc\xb8"),
+               ENTITY_DEF("Yuml", 376, "\xc5\xb8"),
+               ENTITY_DEF("comp", 8705, "\xe2\x88\x81"),
+               ENTITY_DEF("dotminus", 8760, "\xe2\x88\xb8"),
+               ENTITY_DEF("crarr", 8629, "\xe2\x86\xb5"),
+               ENTITY_DEF("imped", 437, "\xc6\xb5"),
+               ENTITY_DEF("barwedge", 8965, "\xe2\x8c\x85"),
+               ENTITY_DEF("harrcir", 10568, "\xe2\xa5\x88")
+);
+
+class html_entities_storage {
+       robin_hood::unordered_flat_map<std::string_view, html_entity_def> entity_by_name;
+       robin_hood::unordered_flat_map<unsigned, html_entity_def> entity_by_id;
+public:
+       html_entities_storage() {
+               entity_by_name.reserve(html_entities_array.size());
+               entity_by_id.reserve(html_entities_array.size());
+
+               for (const auto &e : html_entities_array) {
+                       entity_by_name[e.name] = e;
+                       entity_by_id[e.code] = e;
+               }
+       }
+
+       auto by_name(std::string_view name) const -> const html_entity_def * {
+               auto it = entity_by_name.find(name);
+
+               if (it != entity_by_name.end()) {
+                       return &(it->second);
+               }
+
+               return nullptr;
+       }
+
+       auto by_id(int id) const -> const html_entity_def * {
+               auto it = entity_by_id.find(id);
+               if (it != entity_by_id.end()) {
+                       return &(it->second);
+               }
+
+               return nullptr;
+       }
+};
+
+static const html_entities_storage html_entities_defs;
+
+std::size_t
+decode_html_entitles_inplace(char *s, std::size_t len)
+{
+       long l, rep_len;
+       char *t = s, *h = s, *e = s, *end_ptr, old_c;
+       const gchar *end;
+       const gchar *entity;
+       bool seen_hash = false, seen_hex = false;
+       enum {
+               do_undefined,
+               do_digits_only,
+               do_mixed,
+       } seen_digit_only;
+       int state = 0, base;
+       UChar32 uc;
+
+       if (len == 0) {
+               return 0;
+       }
+       else {
+               l = len;
+       }
+
+       end = s + l;
+
+       while (h - s < l && t <= h) {
+               switch (state) {
+                       /* Out of entity */
+               case 0:
+                       if (*h == '&') {
+                               state = 1;
+                               seen_hash = false;
+                               seen_hex = false;
+                               seen_digit_only = do_undefined;
+                               e = h;
+                               h++;
+                               continue;
+                       }
+                       else {
+                               *t = *h;
+                               h++;
+                               t++;
+                       }
+                       break;
+               case 1:
+                       if (*h == ';' && h > e) {
+decode_entity:
+                               old_c = *h;
+                               *h = '\0';
+                               entity = e + 1;
+                               uc = 0;
+
+                               if (*entity != '#') {
+                                       const auto *entity_def = html_entities_defs.by_name({entity,
+                                                                                                                                                (std::size_t) (h - entity)});
+                                       *h = old_c;
+
+                                       if (entity_def) {
+                                               rep_len = entity_def->replacement.size();
+
+                                               if (end - t >= rep_len) {
+                                                       memcpy(t, entity_def->replacement.data(),
+                                                                       rep_len);
+                                                       t += rep_len;
+                                               }
+                                       }
+                                       else {
+                                               if (end - t > h - e + 1) {
+                                                       memmove(t, e, h - e + 1);
+                                                       t += h - e + 1;
+                                               }
+                                       }
+                               }
+                               else if (e + 2 < h) {
+                                       if (*(e + 2) == 'x' || *(e + 2) == 'X') {
+                                               base = 16;
+                                       }
+                                       else if (*(e + 2) == 'o' || *(e + 2) == 'O') {
+                                               base = 8;
+                                       }
+                                       else {
+                                               base = 10;
+                                       }
+
+                                       if (base == 10) {
+                                               uc = strtoul((e + 2), &end_ptr, base);
+                                       }
+                                       else {
+                                               uc = strtoul((e + 3), &end_ptr, base);
+                                       }
+
+                                       if (end_ptr != nullptr && *end_ptr != '\0') {
+                                               /* Skip undecoded */
+                                               *h = old_c;
+
+                                               if (end - t > h - e + 1) {
+                                                       memmove(t, e, h - e + 1);
+                                                       t += h - e + 1;
+                                               }
+                                       }
+                                       else {
+                                               /* Search for a replacement */
+                                               *h = old_c;
+                                               const auto *entity_def = html_entities_defs.by_id(uc);
+
+                                               if (entity_def) {
+                                                       rep_len = entity_def->replacement.size();
+
+                                                       if (end - t >= rep_len) {
+                                                               memcpy(t, entity_def->replacement.data(),
+                                                                               rep_len);
+                                                               t += rep_len;
+                                                       }
+                                               }
+                                               else {
+                                                       /* Unicode point */
+                                                       goffset off = t - s;
+                                                       UBool is_error = 0;
+
+                                                       if (uc > 0) {
+                                                               U8_APPEND (s, off, len, uc, is_error);
+                                                               if (!is_error) {
+                                                                       t = s + off;
+                                                               }
+                                                               else {
+                                                                       /* Leave invalid entities as is */
+                                                                       if (end - t > h - e + 1) {
+                                                                               memmove(t, e, h - e + 1);
+                                                                               t += h - e + 1;
+                                                                       }
+                                                               }
+                                                       }
+                                                       else if (end - t > h - e + 1) {
+                                                               memmove(t, e, h - e + 1);
+                                                               t += h - e + 1;
+                                                       }
+                                               }
+
+                                               if (end - t > 0 && old_c != ';') {
+                                                       /* Fuck email clients, fuck them */
+                                                       *t++ = old_c;
+                                               }
+                                       }
+                               }
+
+                               state = 0;
+                       }
+                       else if (*h == '&') {
+                               /* Previous `&` was bogus */
+                               state = 1;
+
+                               if (end - t > h - e) {
+                                       memmove(t, e, h - e);
+                                       t += h - e;
+                               }
+
+                               e = h;
+                       }
+                       else if (*h == '#') {
+                               seen_hash = true;
+
+                               if (h + 1 < end && h[1] == 'x') {
+                                       seen_hex = true;
+                                       /* Skip one more character */
+                                       h++;
+                               }
+                       }
+                       else if (seen_digit_only != do_mixed &&
+                                        (g_ascii_isdigit (*h) || (seen_hex && g_ascii_isxdigit (*h)))) {
+                               seen_digit_only = do_digits_only;
+                       }
+                       else {
+                               if (seen_digit_only == do_digits_only && seen_hash && h > e) {
+                                       /* We have seen some digits, so we can try to decode, eh */
+                                       /* Fuck retarded email clients... */
+                                       goto decode_entity;
+                               }
+
+                               seen_digit_only = do_mixed;
+                       }
+
+                       h++;
+
+                       break;
+               }
+       }
+
+       /* Leftover */
+       if (state == 1 && h > e) {
+               /* Unfinished entity, copy as is */
+               if (end - t >= h - e) {
+                       memmove(t, e, h - e);
+                       t += h - e;
+               }
+       }
+
+       return (t - s);
+}
+
+} // namespace rspamd::html
\ No newline at end of file
index 4953a0bf97023bcbcfb149c7a9a74923dc080ca0..9e48c20a0194fc9a79c0940e92fc570ce6d2b810 100644 (file)
@@ -1,5 +1,5 @@
 /*-
- * Copyright 2018 Vsevolod Stakhov
+ * Copyright 2021 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
 
 #ifndef RSPAMD_HTML_ENTITIES_H
 #define RSPAMD_HTML_ENTITIES_H
+#pragma once
 
-#include <string>
-#include <contrib/robin-hood/robin_hood.h>
+#include <utility>
 
 namespace rspamd::html {
 
-struct html_entity_def {
-       std::string name;
-       std::string replacement;
-       unsigned code;
-};
-
-#define ENTITY_DEF(name, code, replacement) html_entity_def{(name), (replacement), (code)}
-
-static const auto html_entities_array = rspamd::array_of<html_entity_def>(
-               ENTITY_DEF("szlig", 223, "\xc3\x9f"),
-               ENTITY_DEF("prime", 8242, "\xe2\x80\xb2"),
-               ENTITY_DEF("lnsim", 8934, "\xe2\x8b\xa6"),
-               ENTITY_DEF("nvDash", 8877, "\xe2\x8a\xad"),
-               ENTITY_DEF("isinsv", 8947, "\xe2\x8b\xb3"),
-               ENTITY_DEF("notin", 8713, "\xe2\x88\x89"),
-               ENTITY_DEF("becaus", 8757, "\xe2\x88\xb5"),
-               ENTITY_DEF("Leftrightarrow", 8660, "\xe2\x87\x94"),
-               ENTITY_DEF("EmptySmallSquare", 9723, "\xe2\x97\xbb"),
-               ENTITY_DEF("SquareUnion", 8852, "\xe2\x8a\x94"),
-               ENTITY_DEF("subdot", 10941, "\xe2\xaa\xbd"),
-               ENTITY_DEF("Dstrok", 272, "\xc4\x90"),
-               ENTITY_DEF("rrarr", 8649, "\xe2\x87\x89"),
-               ENTITY_DEF("rArr", 8658, "\xe2\x87\x92"),
-               ENTITY_DEF("Aacute", 193, "\xc3\x81"),
-               ENTITY_DEF("kappa", 954, "\xce\xba"),
-               ENTITY_DEF("Iopf", 120128, "\xf0\x9d\x95\x80"),
-               ENTITY_DEF("hyphen", 8208, "\xe2\x80\x90"),
-               ENTITY_DEF("rarrbfs", 10528, "\xe2\xa4\xa0"),
-               ENTITY_DEF("supsetneqq", 10956, "\xe2\xab\x8c"),
-               ENTITY_DEF("gacute", 501, "\xc7\xb5"),
-               ENTITY_DEF("VeryThinSpace", 8202, "\xe2\x80\x8a"),
-               ENTITY_DEF("tint", 8749, "\xe2\x88\xad"),
-               ENTITY_DEF("ffr", 120099, "\xf0\x9d\x94\xa3"),
-               ENTITY_DEF("kgreen", 312, "\xc4\xb8"),
-               ENTITY_DEF("nis", 8956, "\xe2\x8b\xbc"),
-               ENTITY_DEF("NotRightTriangleBar", 10704, "\xe2\xa7\x90\xcc\xb8"),
-               ENTITY_DEF("Eogon", 280, "\xc4\x98"),
-               ENTITY_DEF("lbrke", 10635, "\xe2\xa6\x8b"),
-               ENTITY_DEF("phi", 966, "\xcf\x86"),
-               ENTITY_DEF("notnivc", 8957, "\xe2\x8b\xbd"),
-               ENTITY_DEF("utilde", 361, "\xc5\xa9"),
-               ENTITY_DEF("Fopf", 120125, "\xf0\x9d\x94\xbd"),
-               ENTITY_DEF("Vcy", 1042, "\xd0\x92"),
-               ENTITY_DEF("erDot", 8787, "\xe2\x89\x93"),
-               ENTITY_DEF("nsubE", 10949, "\xe2\xab\x85\xcc\xb8"),
-               ENTITY_DEF("egrave", 232, "\xc3\xa8"),
-               ENTITY_DEF("Lcedil", 315, "\xc4\xbb"),
-               ENTITY_DEF("lharul", 10602, "\xe2\xa5\xaa"),
-               ENTITY_DEF("middot", 183, "\xc2\xb7"),
-               ENTITY_DEF("ggg", 8921, "\xe2\x8b\x99"),
-               ENTITY_DEF("NestedLessLess", 8810, "\xe2\x89\xaa"),
-               ENTITY_DEF("tau", 964, "\xcf\x84"),
-               ENTITY_DEF("setmn", 8726, "\xe2\x88\x96"),
-               ENTITY_DEF("frac78", 8542, "\xe2\x85\x9e"),
-               ENTITY_DEF("para", 182, "\xc2\xb6"),
-               ENTITY_DEF("Rcedil", 342, "\xc5\x96"),
-               ENTITY_DEF("propto", 8733, "\xe2\x88\x9d"),
-               ENTITY_DEF("sqsubset", 8847, "\xe2\x8a\x8f"),
-               ENTITY_DEF("ensp", 8194, "\xe2\x80\x82"),
-               ENTITY_DEF("boxvH", 9578, "\xe2\x95\xaa"),
-               ENTITY_DEF("NotGreaterTilde", 8821, "\xe2\x89\xb5"),
-               ENTITY_DEF("ffllig", 64260, "\xef\xac\x84"),
-               ENTITY_DEF("kcedil", 311, "\xc4\xb7"),
-               ENTITY_DEF("omega", 969, "\xcf\x89"),
-               ENTITY_DEF("sime", 8771, "\xe2\x89\x83"),
-               ENTITY_DEF("LeftTriangleEqual", 8884, "\xe2\x8a\xb4"),
-               ENTITY_DEF("bsemi", 8271, "\xe2\x81\x8f"),
-               ENTITY_DEF("rdquor", 8221, "\xe2\x80\x9d"),
-               ENTITY_DEF("Utilde", 360, "\xc5\xa8"),
-               ENTITY_DEF("bsol", 92, "\x5c"),
-               ENTITY_DEF("risingdotseq", 8787, "\xe2\x89\x93"),
-               ENTITY_DEF("ultri", 9720, "\xe2\x97\xb8"),
-               ENTITY_DEF("rhov", 1009, "\xcf\xb1"),
-               ENTITY_DEF("TildeEqual", 8771, "\xe2\x89\x83"),
-               ENTITY_DEF("jukcy", 1108, "\xd1\x94"),
-               ENTITY_DEF("perp", 8869, "\xe2\x8a\xa5"),
-               ENTITY_DEF("capbrcup", 10825, "\xe2\xa9\x89"),
-               ENTITY_DEF("ltrie", 8884, "\xe2\x8a\xb4"),
-               ENTITY_DEF("LessTilde", 8818, "\xe2\x89\xb2"),
-               ENTITY_DEF("popf", 120161, "\xf0\x9d\x95\xa1"),
-               ENTITY_DEF("dbkarow", 10511, "\xe2\xa4\x8f"),
-               ENTITY_DEF("roang", 10221, "\xe2\x9f\xad"),
-               ENTITY_DEF("brvbar", 166, "\xc2\xa6"),
-               ENTITY_DEF("CenterDot", 183, "\xc2\xb7"),
-               ENTITY_DEF("notindot", 8949, "\xe2\x8b\xb5\xcc\xb8"),
-               ENTITY_DEF("supmult", 10946, "\xe2\xab\x82"),
-               ENTITY_DEF("multimap", 8888, "\xe2\x8a\xb8"),
-               ENTITY_DEF("frac34", 190, "\xc2\xbe"),
-               ENTITY_DEF("mapsto", 8614, "\xe2\x86\xa6"),
-               ENTITY_DEF("flat", 9837, "\xe2\x99\xad"),
-               ENTITY_DEF("updownarrow", 8597, "\xe2\x86\x95"),
-               ENTITY_DEF("gne", 10888, "\xe2\xaa\x88"),
-               ENTITY_DEF("nrarrc", 10547, "\xe2\xa4\xb3\xcc\xb8"),
-               ENTITY_DEF("suphsol", 10185, "\xe2\x9f\x89"),
-               ENTITY_DEF("nGtv", 8811, "\xe2\x89\xab\xcc\xb8"),
-               ENTITY_DEF("hopf", 120153, "\xf0\x9d\x95\x99"),
-               ENTITY_DEF("pointint", 10773, "\xe2\xa8\x95"),
-               ENTITY_DEF("glj", 10916, "\xe2\xaa\xa4"),
-               ENTITY_DEF("LeftDoubleBracket", 10214, "\xe2\x9f\xa6"),
-               ENTITY_DEF("NotSupersetEqual", 8841, "\xe2\x8a\x89"),
-               ENTITY_DEF("dot", 729, "\xcb\x99"),
-               ENTITY_DEF("tbrk", 9140, "\xe2\x8e\xb4"),
-               ENTITY_DEF("LeftUpDownVector", 10577, "\xe2\xa5\x91"),
-               ENTITY_DEF("uml", 168, "\xc2\xa8"),
-               ENTITY_DEF("bbrk", 9141, "\xe2\x8e\xb5"),
-               ENTITY_DEF("nearrow", 8599, "\xe2\x86\x97"),
-               ENTITY_DEF("backsimeq", 8909, "\xe2\x8b\x8d"),
-               ENTITY_DEF("dblac", 733, "\xcb\x9d"),
-               ENTITY_DEF("circleddash", 8861, "\xe2\x8a\x9d"),
-               ENTITY_DEF("ldsh", 8626, "\xe2\x86\xb2"),
-               ENTITY_DEF("sce", 10928, "\xe2\xaa\xb0"),
-               ENTITY_DEF("angst", 197, "\xc3\x85"),
-               ENTITY_DEF("yen", 165, "\xc2\xa5"),
-               ENTITY_DEF("nsupE", 10950, "\xe2\xab\x86\xcc\xb8"),
-               ENTITY_DEF("Uscr", 119984, "\xf0\x9d\x92\xb0"),
-               ENTITY_DEF("subplus", 10943, "\xe2\xaa\xbf"),
-               ENTITY_DEF("nleqq", 8806, "\xe2\x89\xa6\xcc\xb8"),
-               ENTITY_DEF("nprcue", 8928, "\xe2\x8b\xa0"),
-               ENTITY_DEF("Ocirc", 212, "\xc3\x94"),
-               ENTITY_DEF("disin", 8946, "\xe2\x8b\xb2"),
-               ENTITY_DEF("EqualTilde", 8770, "\xe2\x89\x82"),
-               ENTITY_DEF("YUcy", 1070, "\xd0\xae"),
-               ENTITY_DEF("Kscr", 119974, "\xf0\x9d\x92\xa6"),
-               ENTITY_DEF("lg", 8822, "\xe2\x89\xb6"),
-               ENTITY_DEF("nLeftrightarrow", 8654, "\xe2\x87\x8e"),
-               ENTITY_DEF("eplus", 10865, "\xe2\xa9\xb1"),
-               ENTITY_DEF("les", 10877, "\xe2\xa9\xbd"),
-               ENTITY_DEF("sfr", 120112, "\xf0\x9d\x94\xb0"),
-               ENTITY_DEF("HumpDownHump", 8782, "\xe2\x89\x8e"),
-               ENTITY_DEF("Fouriertrf", 8497, "\xe2\x84\xb1"),
-               ENTITY_DEF("Updownarrow", 8661, "\xe2\x87\x95"),
-               ENTITY_DEF("nrarr", 8603, "\xe2\x86\x9b"),
-               ENTITY_DEF("radic", 8730, "\xe2\x88\x9a"),
-               ENTITY_DEF("gnap", 10890, "\xe2\xaa\x8a"),
-               ENTITY_DEF("zeta", 950, "\xce\xb6"),
-               ENTITY_DEF("Qscr", 119980, "\xf0\x9d\x92\xac"),
-               ENTITY_DEF("NotRightTriangleEqual", 8941, "\xe2\x8b\xad"),
-               ENTITY_DEF("nshortmid", 8740, "\xe2\x88\xa4"),
-               ENTITY_DEF("SHCHcy", 1065, "\xd0\xa9"),
-               ENTITY_DEF("piv", 982, "\xcf\x96"),
-               ENTITY_DEF("angmsdaa", 10664, "\xe2\xa6\xa8"),
-               ENTITY_DEF("curlywedge", 8911, "\xe2\x8b\x8f"),
-               ENTITY_DEF("sqcaps", 8851, "\xe2\x8a\x93\xef\xb8\x80"),
-               ENTITY_DEF("sum", 8721, "\xe2\x88\x91"),
-               ENTITY_DEF("rarrtl", 8611, "\xe2\x86\xa3"),
-               ENTITY_DEF("gescc", 10921, "\xe2\xaa\xa9"),
-               ENTITY_DEF("sup", 8835, "\xe2\x8a\x83"),
-               ENTITY_DEF("smid", 8739, "\xe2\x88\xa3"),
-               ENTITY_DEF("cularr", 8630, "\xe2\x86\xb6"),
-               ENTITY_DEF("olcross", 10683, "\xe2\xa6\xbb"),
-               ENTITY_DEF("GT", 62, "\x3e"),
-               ENTITY_DEF("scap", 10936, "\xe2\xaa\xb8"),
-               ENTITY_DEF("capcup", 10823, "\xe2\xa9\x87"),
-               ENTITY_DEF("NotSquareSubsetEqual", 8930, "\xe2\x8b\xa2"),
-               ENTITY_DEF("uhblk", 9600, "\xe2\x96\x80"),
-               ENTITY_DEF("latail", 10521, "\xe2\xa4\x99"),
-               ENTITY_DEF("smtes", 10924, "\xe2\xaa\xac\xef\xb8\x80"),
-               ENTITY_DEF("RoundImplies", 10608, "\xe2\xa5\xb0"),
-               ENTITY_DEF("wreath", 8768, "\xe2\x89\x80"),
-               ENTITY_DEF("curlyvee", 8910, "\xe2\x8b\x8e"),
-               ENTITY_DEF("uscr", 120010, "\xf0\x9d\x93\x8a"),
-               ENTITY_DEF("nleftrightarrow", 8622, "\xe2\x86\xae"),
-               ENTITY_DEF("ucy", 1091, "\xd1\x83"),
-               ENTITY_DEF("nvge", 8805, "\xe2\x89\xa5\xe2\x83\x92"),
-               ENTITY_DEF("bnot", 8976, "\xe2\x8c\x90"),
-               ENTITY_DEF("alefsym", 8501, "\xe2\x84\xb5"),
-               ENTITY_DEF("star", 9734, "\xe2\x98\x86"),
-               ENTITY_DEF("boxHd", 9572, "\xe2\x95\xa4"),
-               ENTITY_DEF("vsubnE", 10955, "\xe2\xab\x8b\xef\xb8\x80"),
-               ENTITY_DEF("Popf", 8473, "\xe2\x84\x99"),
-               ENTITY_DEF("simgE", 10912, "\xe2\xaa\xa0"),
-               ENTITY_DEF("upsilon", 965, "\xcf\x85"),
-               ENTITY_DEF("NoBreak", 8288, "\xe2\x81\xa0"),
-               ENTITY_DEF("realine", 8475, "\xe2\x84\x9b"),
-               ENTITY_DEF("frac38", 8540, "\xe2\x85\x9c"),
-               ENTITY_DEF("YAcy", 1071, "\xd0\xaf"),
-               ENTITY_DEF("bnequiv", 8801, "\xe2\x89\xa1\xe2\x83\xa5"),
-               ENTITY_DEF("cudarrr", 10549, "\xe2\xa4\xb5"),
-               ENTITY_DEF("lsime", 10893, "\xe2\xaa\x8d"),
-               ENTITY_DEF("lowbar", 95, "\x5f"),
-               ENTITY_DEF("utdot", 8944, "\xe2\x8b\xb0"),
-               ENTITY_DEF("ReverseElement", 8715, "\xe2\x88\x8b"),
-               ENTITY_DEF("nshortparallel", 8742, "\xe2\x88\xa6"),
-               ENTITY_DEF("DJcy", 1026, "\xd0\x82"),
-               ENTITY_DEF("nsube", 8840, "\xe2\x8a\x88"),
-               ENTITY_DEF("VDash", 8875, "\xe2\x8a\xab"),
-               ENTITY_DEF("Ncaron", 327, "\xc5\x87"),
-               ENTITY_DEF("LeftUpVector", 8639, "\xe2\x86\xbf"),
-               ENTITY_DEF("Kcy", 1050, "\xd0\x9a"),
-               ENTITY_DEF("NotLeftTriangleEqual", 8940, "\xe2\x8b\xac"),
-               ENTITY_DEF("nvHarr", 10500, "\xe2\xa4\x84"),
-               ENTITY_DEF("lotimes", 10804, "\xe2\xa8\xb4"),
-               ENTITY_DEF("RightFloor", 8971, "\xe2\x8c\x8b"),
-               ENTITY_DEF("succ", 8827, "\xe2\x89\xbb"),
-               ENTITY_DEF("Ucy", 1059, "\xd0\xa3"),
-               ENTITY_DEF("darr", 8595, "\xe2\x86\x93"),
-               ENTITY_DEF("lbarr", 10508, "\xe2\xa4\x8c"),
-               ENTITY_DEF("xfr", 120117, "\xf0\x9d\x94\xb5"),
-               ENTITY_DEF("zopf", 120171, "\xf0\x9d\x95\xab"),
-               ENTITY_DEF("Phi", 934, "\xce\xa6"),
-               ENTITY_DEF("ord", 10845, "\xe2\xa9\x9d"),
-               ENTITY_DEF("iinfin", 10716, "\xe2\xa7\x9c"),
-               ENTITY_DEF("Xfr", 120091, "\xf0\x9d\x94\x9b"),
-               ENTITY_DEF("qint", 10764, "\xe2\xa8\x8c"),
-               ENTITY_DEF("Upsilon", 933, "\xce\xa5"),
-               ENTITY_DEF("NotSubset", 8834, "\xe2\x8a\x82\xe2\x83\x92"),
-               ENTITY_DEF("gfr", 120100, "\xf0\x9d\x94\xa4"),
-               ENTITY_DEF("notnivb", 8958, "\xe2\x8b\xbe"),
-               ENTITY_DEF("Afr", 120068, "\xf0\x9d\x94\x84"),
-               ENTITY_DEF("ge", 8805, "\xe2\x89\xa5"),
-               ENTITY_DEF("iexcl", 161, "\xc2\xa1"),
-               ENTITY_DEF("dfr", 120097, "\xf0\x9d\x94\xa1"),
-               ENTITY_DEF("rsaquo", 8250, "\xe2\x80\xba"),
-               ENTITY_DEF("xcap", 8898, "\xe2\x8b\x82"),
-               ENTITY_DEF("Jopf", 120129, "\xf0\x9d\x95\x81"),
-               ENTITY_DEF("Hstrok", 294, "\xc4\xa6"),
-               ENTITY_DEF("ldca", 10550, "\xe2\xa4\xb6"),
-               ENTITY_DEF("lmoust", 9136, "\xe2\x8e\xb0"),
-               ENTITY_DEF("wcirc", 373, "\xc5\xb5"),
-               ENTITY_DEF("DownRightVector", 8641, "\xe2\x87\x81"),
-               ENTITY_DEF("LessFullEqual", 8806, "\xe2\x89\xa6"),
-               ENTITY_DEF("dotsquare", 8865, "\xe2\x8a\xa1"),
-               ENTITY_DEF("zhcy", 1078, "\xd0\xb6"),
-               ENTITY_DEF("mDDot", 8762, "\xe2\x88\xba"),
-               ENTITY_DEF("Prime", 8243, "\xe2\x80\xb3"),
-               ENTITY_DEF("prec", 8826, "\xe2\x89\xba"),
-               ENTITY_DEF("swnwar", 10538, "\xe2\xa4\xaa"),
-               ENTITY_DEF("COPY", 169, "\xc2\xa9"),
-               ENTITY_DEF("cong", 8773, "\xe2\x89\x85"),
-               ENTITY_DEF("sacute", 347, "\xc5\x9b"),
-               ENTITY_DEF("Nopf", 8469, "\xe2\x84\x95"),
-               ENTITY_DEF("it", 8290, "\xe2\x81\xa2"),
-               ENTITY_DEF("SOFTcy", 1068, "\xd0\xac"),
-               ENTITY_DEF("uuarr", 8648, "\xe2\x87\x88"),
-               ENTITY_DEF("iota", 953, "\xce\xb9"),
-               ENTITY_DEF("notinE", 8953, "\xe2\x8b\xb9\xcc\xb8"),
-               ENTITY_DEF("jfr", 120103, "\xf0\x9d\x94\xa7"),
-               ENTITY_DEF("QUOT", 34, "\x22"),
-               ENTITY_DEF("vsupnE", 10956, "\xe2\xab\x8c\xef\xb8\x80"),
-               ENTITY_DEF("igrave", 236, "\xc3\xac"),
-               ENTITY_DEF("bsim", 8765, "\xe2\x88\xbd"),
-               ENTITY_DEF("npreceq", 10927, "\xe2\xaa\xaf\xcc\xb8"),
-               ENTITY_DEF("zcaron", 382, "\xc5\xbe"),
-               ENTITY_DEF("DD", 8517, "\xe2\x85\x85"),
-               ENTITY_DEF("gamma", 947, "\xce\xb3"),
-               ENTITY_DEF("homtht", 8763, "\xe2\x88\xbb"),
-               ENTITY_DEF("NonBreakingSpace", 160, "\xc2\xa0"),
-               ENTITY_DEF("Proportion", 8759, "\xe2\x88\xb7"),
-               ENTITY_DEF("nedot", 8784, "\xe2\x89\x90\xcc\xb8"),
-               ENTITY_DEF("nabla", 8711, "\xe2\x88\x87"),
-               ENTITY_DEF("ac", 8766, "\xe2\x88\xbe"),
-               ENTITY_DEF("nsupe", 8841, "\xe2\x8a\x89"),
-               ENTITY_DEF("ell", 8467, "\xe2\x84\x93"),
-               ENTITY_DEF("boxvR", 9566, "\xe2\x95\x9e"),
-               ENTITY_DEF("LowerRightArrow", 8600, "\xe2\x86\x98"),
-               ENTITY_DEF("boxHu", 9575, "\xe2\x95\xa7"),
-               ENTITY_DEF("lE", 8806, "\xe2\x89\xa6"),
-               ENTITY_DEF("dzigrarr", 10239, "\xe2\x9f\xbf"),
-               ENTITY_DEF("rfloor", 8971, "\xe2\x8c\x8b"),
-               ENTITY_DEF("gneq", 10888, "\xe2\xaa\x88"),
-               ENTITY_DEF("rightleftharpoons", 8652, "\xe2\x87\x8c"),
-               ENTITY_DEF("gtquest", 10876, "\xe2\xa9\xbc"),
-               ENTITY_DEF("searhk", 10533, "\xe2\xa4\xa5"),
-               ENTITY_DEF("gesdoto", 10882, "\xe2\xaa\x82"),
-               ENTITY_DEF("cross", 10007, "\xe2\x9c\x97"),
-               ENTITY_DEF("rdquo", 8221, "\xe2\x80\x9d"),
-               ENTITY_DEF("sqsupset", 8848, "\xe2\x8a\x90"),
-               ENTITY_DEF("divonx", 8903, "\xe2\x8b\x87"),
-               ENTITY_DEF("lat", 10923, "\xe2\xaa\xab"),
-               ENTITY_DEF("rmoustache", 9137, "\xe2\x8e\xb1"),
-               ENTITY_DEF("succapprox", 10936, "\xe2\xaa\xb8"),
-               ENTITY_DEF("nhpar", 10994, "\xe2\xab\xb2"),
-               ENTITY_DEF("sharp", 9839, "\xe2\x99\xaf"),
-               ENTITY_DEF("lrcorner", 8991, "\xe2\x8c\x9f"),
-               ENTITY_DEF("Vscr", 119985, "\xf0\x9d\x92\xb1"),
-               ENTITY_DEF("varsigma", 962, "\xcf\x82"),
-               ENTITY_DEF("bsolb", 10693, "\xe2\xa7\x85"),
-               ENTITY_DEF("cupcap", 10822, "\xe2\xa9\x86"),
-               ENTITY_DEF("leftrightarrow", 8596, "\xe2\x86\x94"),
-               ENTITY_DEF("LeftTee", 8867, "\xe2\x8a\xa3"),
-               ENTITY_DEF("Sqrt", 8730, "\xe2\x88\x9a"),
-               ENTITY_DEF("Odblac", 336, "\xc5\x90"),
-               ENTITY_DEF("ocir", 8858, "\xe2\x8a\x9a"),
-               ENTITY_DEF("eqslantless", 10901, "\xe2\xaa\x95"),
-               ENTITY_DEF("supedot", 10948, "\xe2\xab\x84"),
-               ENTITY_DEF("intercal", 8890, "\xe2\x8a\xba"),
-               ENTITY_DEF("Gbreve", 286, "\xc4\x9e"),
-               ENTITY_DEF("xrArr", 10233, "\xe2\x9f\xb9"),
-               ENTITY_DEF("NotTildeEqual", 8772, "\xe2\x89\x84"),
-               ENTITY_DEF("Bfr", 120069, "\xf0\x9d\x94\x85"),
-               ENTITY_DEF("Iuml", 207, "\xc3\x8f"),
-               ENTITY_DEF("leg", 8922, "\xe2\x8b\x9a"),
-               ENTITY_DEF("boxhU", 9576, "\xe2\x95\xa8"),
-               ENTITY_DEF("Gopf", 120126, "\xf0\x9d\x94\xbe"),
-               ENTITY_DEF("af", 8289, "\xe2\x81\xa1"),
-               ENTITY_DEF("xwedge", 8896, "\xe2\x8b\x80"),
-               ENTITY_DEF("precapprox", 10935, "\xe2\xaa\xb7"),
-               ENTITY_DEF("lcedil", 316, "\xc4\xbc"),
-               ENTITY_DEF("between", 8812, "\xe2\x89\xac"),
-               ENTITY_DEF("Oslash", 216, "\xc3\x98"),
-               ENTITY_DEF("breve", 728, "\xcb\x98"),
-               ENTITY_DEF("caps", 8745, "\xe2\x88\xa9\xef\xb8\x80"),
-               ENTITY_DEF("vangrt", 10652, "\xe2\xa6\x9c"),
-               ENTITY_DEF("lagran", 8466, "\xe2\x84\x92"),
-               ENTITY_DEF("kopf", 120156, "\xf0\x9d\x95\x9c"),
-               ENTITY_DEF("ReverseUpEquilibrium", 10607, "\xe2\xa5\xaf"),
-               ENTITY_DEF("nlsim", 8820, "\xe2\x89\xb4"),
-               ENTITY_DEF("Cap", 8914, "\xe2\x8b\x92"),
-               ENTITY_DEF("angmsdac", 10666, "\xe2\xa6\xaa"),
-               ENTITY_DEF("iocy", 1105, "\xd1\x91"),
-               ENTITY_DEF("seswar", 10537, "\xe2\xa4\xa9"),
-               ENTITY_DEF("dzcy", 1119, "\xd1\x9f"),
-               ENTITY_DEF("nsubset", 8834, "\xe2\x8a\x82\xe2\x83\x92"),
-               ENTITY_DEF("cup", 8746, "\xe2\x88\xaa"),
-               ENTITY_DEF("npar", 8742, "\xe2\x88\xa6"),
-               ENTITY_DEF("late", 10925, "\xe2\xaa\xad"),
-               ENTITY_DEF("plussim", 10790, "\xe2\xa8\xa6"),
-               ENTITY_DEF("Darr", 8609, "\xe2\x86\xa1"),
-               ENTITY_DEF("nexist", 8708, "\xe2\x88\x84"),
-               ENTITY_DEF("cent", 162, "\xc2\xa2"),
-               ENTITY_DEF("khcy", 1093, "\xd1\x85"),
-               ENTITY_DEF("smallsetminus", 8726, "\xe2\x88\x96"),
-               ENTITY_DEF("ycirc", 375, "\xc5\xb7"),
-               ENTITY_DEF("lharu", 8636, "\xe2\x86\xbc"),
-               ENTITY_DEF("upuparrows", 8648, "\xe2\x87\x88"),
-               ENTITY_DEF("sigmaf", 962, "\xcf\x82"),
-               ENTITY_DEF("nltri", 8938, "\xe2\x8b\xaa"),
-               ENTITY_DEF("mstpos", 8766, "\xe2\x88\xbe"),
-               ENTITY_DEF("Zopf", 8484, "\xe2\x84\xa4"),
-               ENTITY_DEF("dwangle", 10662, "\xe2\xa6\xa6"),
-               ENTITY_DEF("bowtie", 8904, "\xe2\x8b\x88"),
-               ENTITY_DEF("Dfr", 120071, "\xf0\x9d\x94\x87"),
-               ENTITY_DEF("iacute", 237, "\xc3\xad"),
-               ENTITY_DEF("njcy", 1114, "\xd1\x9a"),
-               ENTITY_DEF("cfr", 120096, "\xf0\x9d\x94\xa0"),
-               ENTITY_DEF("TripleDot", 8411, "\xe2\x83\x9b"),
-               ENTITY_DEF("Or", 10836, "\xe2\xa9\x94"),
-               ENTITY_DEF("blk34", 9619, "\xe2\x96\x93"),
-               ENTITY_DEF("equiv", 8801, "\xe2\x89\xa1"),
-               ENTITY_DEF("fflig", 64256, "\xef\xac\x80"),
-               ENTITY_DEF("Rang", 10219, "\xe2\x9f\xab"),
-               ENTITY_DEF("Wopf", 120142, "\xf0\x9d\x95\x8e"),
-               ENTITY_DEF("boxUl", 9564, "\xe2\x95\x9c"),
-               ENTITY_DEF("frac12", 189, "\xc2\xbd"),
-               ENTITY_DEF("clubs", 9827, "\xe2\x99\xa3"),
-               ENTITY_DEF("amalg", 10815, "\xe2\xa8\xbf"),
-               ENTITY_DEF("Lang", 10218, "\xe2\x9f\xaa"),
-               ENTITY_DEF("asymp", 8776, "\xe2\x89\x88"),
-               ENTITY_DEF("models", 8871, "\xe2\x8a\xa7"),
-               ENTITY_DEF("emptyset", 8709, "\xe2\x88\x85"),
-               ENTITY_DEF("Tscr", 119983, "\xf0\x9d\x92\xaf"),
-               ENTITY_DEF("nleftarrow", 8602, "\xe2\x86\x9a"),
-               ENTITY_DEF("Omacr", 332, "\xc5\x8c"),
-               ENTITY_DEF("gtrarr", 10616, "\xe2\xa5\xb8"),
-               ENTITY_DEF("excl", 33, "\x21"),
-               ENTITY_DEF("rarrw", 8605, "\xe2\x86\x9d"),
-               ENTITY_DEF("abreve", 259, "\xc4\x83"),
-               ENTITY_DEF("CircleTimes", 8855, "\xe2\x8a\x97"),
-               ENTITY_DEF("aopf", 120146, "\xf0\x9d\x95\x92"),
-               ENTITY_DEF("eqvparsl", 10725, "\xe2\xa7\xa5"),
-               ENTITY_DEF("boxv", 9474, "\xe2\x94\x82"),
-               ENTITY_DEF("SuchThat", 8715, "\xe2\x88\x8b"),
-               ENTITY_DEF("varphi", 981, "\xcf\x95"),
-               ENTITY_DEF("Ropf", 8477, "\xe2\x84\x9d"),
-               ENTITY_DEF("rscr", 120007, "\xf0\x9d\x93\x87"),
-               ENTITY_DEF("Rrightarrow", 8667, "\xe2\x87\x9b"),
-               ENTITY_DEF("equest", 8799, "\xe2\x89\x9f"),
-               ENTITY_DEF("ntilde", 241, "\xc3\xb1"),
-               ENTITY_DEF("Escr", 8496, "\xe2\x84\xb0"),
-               ENTITY_DEF("Lopf", 120131, "\xf0\x9d\x95\x83"),
-               ENTITY_DEF("GreaterGreater", 10914, "\xe2\xaa\xa2"),
-               ENTITY_DEF("pluscir", 10786, "\xe2\xa8\xa2"),
-               ENTITY_DEF("nsupset", 8835, "\xe2\x8a\x83\xe2\x83\x92"),
-               ENTITY_DEF("uArr", 8657, "\xe2\x87\x91"),
-               ENTITY_DEF("nwarhk", 10531, "\xe2\xa4\xa3"),
-               ENTITY_DEF("Ycirc", 374, "\xc5\xb6"),
-               ENTITY_DEF("tdot", 8411, "\xe2\x83\x9b"),
-               ENTITY_DEF("circledS", 9416, "\xe2\x93\x88"),
-               ENTITY_DEF("lhard", 8637, "\xe2\x86\xbd"),
-               ENTITY_DEF("iukcy", 1110, "\xd1\x96"),
-               ENTITY_DEF("PrecedesSlantEqual", 8828, "\xe2\x89\xbc"),
-               ENTITY_DEF("Sfr", 120086, "\xf0\x9d\x94\x96"),
-               ENTITY_DEF("egs", 10902, "\xe2\xaa\x96"),
-               ENTITY_DEF("oelig", 339, "\xc5\x93"),
-               ENTITY_DEF("bigtriangledown", 9661, "\xe2\x96\xbd"),
-               ENTITY_DEF("EmptyVerySmallSquare", 9643, "\xe2\x96\xab"),
-               ENTITY_DEF("Backslash", 8726, "\xe2\x88\x96"),
-               ENTITY_DEF("nscr", 120003, "\xf0\x9d\x93\x83"),
-               ENTITY_DEF("uogon", 371, "\xc5\xb3"),
-               ENTITY_DEF("circeq", 8791, "\xe2\x89\x97"),
-               ENTITY_DEF("check", 10003, "\xe2\x9c\x93"),
-               ENTITY_DEF("Sup", 8913, "\xe2\x8b\x91"),
-               ENTITY_DEF("Rcaron", 344, "\xc5\x98"),
-               ENTITY_DEF("lneqq", 8808, "\xe2\x89\xa8"),
-               ENTITY_DEF("lrhar", 8651, "\xe2\x87\x8b"),
-               ENTITY_DEF("ulcorn", 8988, "\xe2\x8c\x9c"),
-               ENTITY_DEF("timesd", 10800, "\xe2\xa8\xb0"),
-               ENTITY_DEF("Sum", 8721, "\xe2\x88\x91"),
-               ENTITY_DEF("varpropto", 8733, "\xe2\x88\x9d"),
-               ENTITY_DEF("Lcaron", 317, "\xc4\xbd"),
-               ENTITY_DEF("lbrkslu", 10637, "\xe2\xa6\x8d"),
-               ENTITY_DEF("AElig", 198, "\xc3\x86"),
-               ENTITY_DEF("varr", 8597, "\xe2\x86\x95"),
-               ENTITY_DEF("nvinfin", 10718, "\xe2\xa7\x9e"),
-               ENTITY_DEF("leq", 8804, "\xe2\x89\xa4"),
-               ENTITY_DEF("biguplus", 10756, "\xe2\xa8\x84"),
-               ENTITY_DEF("rpar", 41, "\x29"),
-               ENTITY_DEF("eng", 331, "\xc5\x8b"),
-               ENTITY_DEF("NegativeThinSpace", 8203, "\xe2\x80\x8b"),
-               ENTITY_DEF("lesssim", 8818, "\xe2\x89\xb2"),
-               ENTITY_DEF("lBarr", 10510, "\xe2\xa4\x8e"),
-               ENTITY_DEF("LeftUpTeeVector", 10592, "\xe2\xa5\xa0"),
-               ENTITY_DEF("gnE", 8809, "\xe2\x89\xa9"),
-               ENTITY_DEF("efr", 120098, "\xf0\x9d\x94\xa2"),
-               ENTITY_DEF("barvee", 8893, "\xe2\x8a\xbd"),
-               ENTITY_DEF("ee", 8519, "\xe2\x85\x87"),
-               ENTITY_DEF("Uogon", 370, "\xc5\xb2"),
-               ENTITY_DEF("gnapprox", 10890, "\xe2\xaa\x8a"),
-               ENTITY_DEF("olcir", 10686, "\xe2\xa6\xbe"),
-               ENTITY_DEF("boxUL", 9565, "\xe2\x95\x9d"),
-               ENTITY_DEF("Gg", 8921, "\xe2\x8b\x99"),
-               ENTITY_DEF("CloseCurlyQuote", 8217, "\xe2\x80\x99"),
-               ENTITY_DEF("leftharpoondown", 8637, "\xe2\x86\xbd"),
-               ENTITY_DEF("vfr", 120115, "\xf0\x9d\x94\xb3"),
-               ENTITY_DEF("gvertneqq", 8809, "\xe2\x89\xa9\xef\xb8\x80"),
-               ENTITY_DEF("ouml", 246, "\xc3\xb6"),
-               ENTITY_DEF("raemptyv", 10675, "\xe2\xa6\xb3"),
-               ENTITY_DEF("Zcaron", 381, "\xc5\xbd"),
-               ENTITY_DEF("scE", 10932, "\xe2\xaa\xb4"),
-               ENTITY_DEF("boxvh", 9532, "\xe2\x94\xbc"),
-               ENTITY_DEF("ominus", 8854, "\xe2\x8a\x96"),
-               ENTITY_DEF("oopf", 120160, "\xf0\x9d\x95\xa0"),
-               ENTITY_DEF("nsucceq", 10928, "\xe2\xaa\xb0\xcc\xb8"),
-               ENTITY_DEF("RBarr", 10512, "\xe2\xa4\x90"),
-               ENTITY_DEF("iprod", 10812, "\xe2\xa8\xbc"),
-               ENTITY_DEF("lvnE", 8808, "\xe2\x89\xa8\xef\xb8\x80"),
-               ENTITY_DEF("andand", 10837, "\xe2\xa9\x95"),
-               ENTITY_DEF("upharpoonright", 8638, "\xe2\x86\xbe"),
-               ENTITY_DEF("ncongdot", 10861, "\xe2\xa9\xad\xcc\xb8"),
-               ENTITY_DEF("drcrop", 8972, "\xe2\x8c\x8c"),
-               ENTITY_DEF("nsimeq", 8772, "\xe2\x89\x84"),
-               ENTITY_DEF("subsub", 10965, "\xe2\xab\x95"),
-               ENTITY_DEF("hardcy", 1098, "\xd1\x8a"),
-               ENTITY_DEF("leqslant", 10877, "\xe2\xa9\xbd"),
-               ENTITY_DEF("uharl", 8639, "\xe2\x86\xbf"),
-               ENTITY_DEF("expectation", 8496, "\xe2\x84\xb0"),
-               ENTITY_DEF("mdash", 8212, "\xe2\x80\x94"),
-               ENTITY_DEF("VerticalTilde", 8768, "\xe2\x89\x80"),
-               ENTITY_DEF("rdldhar", 10601, "\xe2\xa5\xa9"),
-               ENTITY_DEF("leftharpoonup", 8636, "\xe2\x86\xbc"),
-               ENTITY_DEF("mu", 956, "\xce\xbc"),
-               ENTITY_DEF("curarrm", 10556, "\xe2\xa4\xbc"),
-               ENTITY_DEF("Cdot", 266, "\xc4\x8a"),
-               ENTITY_DEF("NotTildeTilde", 8777, "\xe2\x89\x89"),
-               ENTITY_DEF("boxul", 9496, "\xe2\x94\x98"),
-               ENTITY_DEF("planckh", 8462, "\xe2\x84\x8e"),
-               ENTITY_DEF("CapitalDifferentialD", 8517, "\xe2\x85\x85"),
-               ENTITY_DEF("boxDL", 9559, "\xe2\x95\x97"),
-               ENTITY_DEF("cupbrcap", 10824, "\xe2\xa9\x88"),
-               ENTITY_DEF("boxdL", 9557, "\xe2\x95\x95"),
-               ENTITY_DEF("supe", 8839, "\xe2\x8a\x87"),
-               ENTITY_DEF("nvlt", 60, "\x3c\xe2\x83\x92"),
-               ENTITY_DEF("par", 8741, "\xe2\x88\xa5"),
-               ENTITY_DEF("InvisibleComma", 8291, "\xe2\x81\xa3"),
-               ENTITY_DEF("ring", 730, "\xcb\x9a"),
-               ENTITY_DEF("nvap", 8781, "\xe2\x89\x8d\xe2\x83\x92"),
-               ENTITY_DEF("veeeq", 8794, "\xe2\x89\x9a"),
-               ENTITY_DEF("Hfr", 8460, "\xe2\x84\x8c"),
-               ENTITY_DEF("dstrok", 273, "\xc4\x91"),
-               ENTITY_DEF("gesles", 10900, "\xe2\xaa\x94"),
-               ENTITY_DEF("dash", 8208, "\xe2\x80\x90"),
-               ENTITY_DEF("SHcy", 1064, "\xd0\xa8"),
-               ENTITY_DEF("congdot", 10861, "\xe2\xa9\xad"),
-               ENTITY_DEF("imagline", 8464, "\xe2\x84\x90"),
-               ENTITY_DEF("ncy", 1085, "\xd0\xbd"),
-               ENTITY_DEF("bigstar", 9733, "\xe2\x98\x85"),
-               ENTITY_DEF("REG", 174, "\xc2\xae"),
-               ENTITY_DEF("triangleq", 8796, "\xe2\x89\x9c"),
-               ENTITY_DEF("rsqb", 93, "\x5d"),
-               ENTITY_DEF("ddarr", 8650, "\xe2\x87\x8a"),
-               ENTITY_DEF("csub", 10959, "\xe2\xab\x8f"),
-               ENTITY_DEF("quest", 63, "\x3f"),
-               ENTITY_DEF("Star", 8902, "\xe2\x8b\x86"),
-               ENTITY_DEF("LT", 60, "\x3c"),
-               ENTITY_DEF("ncong", 8775, "\xe2\x89\x87"),
-               ENTITY_DEF("prnE", 10933, "\xe2\xaa\xb5"),
-               ENTITY_DEF("bigtriangleup", 9651, "\xe2\x96\xb3"),
-               ENTITY_DEF("Tilde", 8764, "\xe2\x88\xbc"),
-               ENTITY_DEF("ltrif", 9666, "\xe2\x97\x82"),
-               ENTITY_DEF("ldrdhar", 10599, "\xe2\xa5\xa7"),
-               ENTITY_DEF("lcaron", 318, "\xc4\xbe"),
-               ENTITY_DEF("equivDD", 10872, "\xe2\xa9\xb8"),
-               ENTITY_DEF("lHar", 10594, "\xe2\xa5\xa2"),
-               ENTITY_DEF("vBar", 10984, "\xe2\xab\xa8"),
-               ENTITY_DEF("Mopf", 120132, "\xf0\x9d\x95\x84"),
-               ENTITY_DEF("LeftArrow", 8592, "\xe2\x86\x90"),
-               ENTITY_DEF("Rho", 929, "\xce\xa1"),
-               ENTITY_DEF("Ccirc", 264, "\xc4\x88"),
-               ENTITY_DEF("ifr", 120102, "\xf0\x9d\x94\xa6"),
-               ENTITY_DEF("cacute", 263, "\xc4\x87"),
-               ENTITY_DEF("centerdot", 183, "\xc2\xb7"),
-               ENTITY_DEF("dollar", 36, "\x24"),
-               ENTITY_DEF("lang", 10216, "\xe2\x9f\xa8"),
-               ENTITY_DEF("curvearrowright", 8631, "\xe2\x86\xb7"),
-               ENTITY_DEF("half", 189, "\xc2\xbd"),
-               ENTITY_DEF("Ecy", 1069, "\xd0\xad"),
-               ENTITY_DEF("rcub", 125, "\x7d"),
-               ENTITY_DEF("rcy", 1088, "\xd1\x80"),
-               ENTITY_DEF("isins", 8948, "\xe2\x8b\xb4"),
-               ENTITY_DEF("bsolhsub", 10184, "\xe2\x9f\x88"),
-               ENTITY_DEF("boxuL", 9563, "\xe2\x95\x9b"),
-               ENTITY_DEF("shchcy", 1097, "\xd1\x89"),
-               ENTITY_DEF("cwconint", 8754, "\xe2\x88\xb2"),
-               ENTITY_DEF("euro", 8364, "\xe2\x82\xac"),
-               ENTITY_DEF("lesseqqgtr", 10891, "\xe2\xaa\x8b"),
-               ENTITY_DEF("sim", 8764, "\xe2\x88\xbc"),
-               ENTITY_DEF("rarrc", 10547, "\xe2\xa4\xb3"),
-               ENTITY_DEF("boxdl", 9488, "\xe2\x94\x90"),
-               ENTITY_DEF("Epsilon", 917, "\xce\x95"),
-               ENTITY_DEF("iiiint", 10764, "\xe2\xa8\x8c"),
-               ENTITY_DEF("Rightarrow", 8658, "\xe2\x87\x92"),
-               ENTITY_DEF("conint", 8750, "\xe2\x88\xae"),
-               ENTITY_DEF("boxDl", 9558, "\xe2\x95\x96"),
-               ENTITY_DEF("kappav", 1008, "\xcf\xb0"),
-               ENTITY_DEF("profsurf", 8979, "\xe2\x8c\x93"),
-               ENTITY_DEF("auml", 228, "\xc3\xa4"),
-               ENTITY_DEF("heartsuit", 9829, "\xe2\x99\xa5"),
-               ENTITY_DEF("eacute", 233, "\xc3\xa9"),
-               ENTITY_DEF("gt", 62, "\x3e"),
-               ENTITY_DEF("Gcedil", 290, "\xc4\xa2"),
-               ENTITY_DEF("easter", 10862, "\xe2\xa9\xae"),
-               ENTITY_DEF("Tcy", 1058, "\xd0\xa2"),
-               ENTITY_DEF("swarrow", 8601, "\xe2\x86\x99"),
-               ENTITY_DEF("lopf", 120157, "\xf0\x9d\x95\x9d"),
-               ENTITY_DEF("Agrave", 192, "\xc3\x80"),
-               ENTITY_DEF("Aring", 197, "\xc3\x85"),
-               ENTITY_DEF("fpartint", 10765, "\xe2\xa8\x8d"),
-               ENTITY_DEF("xoplus", 10753, "\xe2\xa8\x81"),
-               ENTITY_DEF("LeftDownTeeVector", 10593, "\xe2\xa5\xa1"),
-               ENTITY_DEF("int", 8747, "\xe2\x88\xab"),
-               ENTITY_DEF("Zeta", 918, "\xce\x96"),
-               ENTITY_DEF("loz", 9674, "\xe2\x97\x8a"),
-               ENTITY_DEF("ncup", 10818, "\xe2\xa9\x82"),
-               ENTITY_DEF("napE", 10864, "\xe2\xa9\xb0\xcc\xb8"),
-               ENTITY_DEF("csup", 10960, "\xe2\xab\x90"),
-               ENTITY_DEF("Ncedil", 325, "\xc5\x85"),
-               ENTITY_DEF("cuwed", 8911, "\xe2\x8b\x8f"),
-               ENTITY_DEF("Dot", 168, "\xc2\xa8"),
-               ENTITY_DEF("SquareIntersection", 8851, "\xe2\x8a\x93"),
-               ENTITY_DEF("map", 8614, "\xe2\x86\xa6"),
-               ENTITY_DEF("aelig", 230, "\xc3\xa6"),
-               ENTITY_DEF("RightArrow", 8594, "\xe2\x86\x92"),
-               ENTITY_DEF("rightharpoondown", 8641, "\xe2\x87\x81"),
-               ENTITY_DEF("bNot", 10989, "\xe2\xab\xad"),
-               ENTITY_DEF("nsccue", 8929, "\xe2\x8b\xa1"),
-               ENTITY_DEF("zigrarr", 8669, "\xe2\x87\x9d"),
-               ENTITY_DEF("Sacute", 346, "\xc5\x9a"),
-               ENTITY_DEF("orv", 10843, "\xe2\xa9\x9b"),
-               ENTITY_DEF("RightVectorBar", 10579, "\xe2\xa5\x93"),
-               ENTITY_DEF("nrarrw", 8605, "\xe2\x86\x9d\xcc\xb8"),
-               ENTITY_DEF("nbump", 8782, "\xe2\x89\x8e\xcc\xb8"),
-               ENTITY_DEF("iquest", 191, "\xc2\xbf"),
-               ENTITY_DEF("wr", 8768, "\xe2\x89\x80"),
-               ENTITY_DEF("UpArrow", 8593, "\xe2\x86\x91"),
-               ENTITY_DEF("notinva", 8713, "\xe2\x88\x89"),
-               ENTITY_DEF("ddagger", 8225, "\xe2\x80\xa1"),
-               ENTITY_DEF("nLeftarrow", 8653, "\xe2\x87\x8d"),
-               ENTITY_DEF("rbbrk", 10099, "\xe2\x9d\xb3"),
-               ENTITY_DEF("RightTriangle", 8883, "\xe2\x8a\xb3"),
-               ENTITY_DEF("leqq", 8806, "\xe2\x89\xa6"),
-               ENTITY_DEF("Vert", 8214, "\xe2\x80\x96"),
-               ENTITY_DEF("gesl", 8923, "\xe2\x8b\x9b\xef\xb8\x80"),
-               ENTITY_DEF("LeftTeeVector", 10586, "\xe2\xa5\x9a"),
-               ENTITY_DEF("Union", 8899, "\xe2\x8b\x83"),
-               ENTITY_DEF("sc", 8827, "\xe2\x89\xbb"),
-               ENTITY_DEF("ofr", 120108, "\xf0\x9d\x94\xac"),
-               ENTITY_DEF("quatint", 10774, "\xe2\xa8\x96"),
-               ENTITY_DEF("apacir", 10863, "\xe2\xa9\xaf"),
-               ENTITY_DEF("profalar", 9006, "\xe2\x8c\xae"),
-               ENTITY_DEF("subsetneq", 8842, "\xe2\x8a\x8a"),
-               ENTITY_DEF("Vvdash", 8874, "\xe2\x8a\xaa"),
-               ENTITY_DEF("ohbar", 10677, "\xe2\xa6\xb5"),
-               ENTITY_DEF("Gt", 8811, "\xe2\x89\xab"),
-               ENTITY_DEF("exist", 8707, "\xe2\x88\x83"),
-               ENTITY_DEF("gtrapprox", 10886, "\xe2\xaa\x86"),
-               ENTITY_DEF("euml", 235, "\xc3\xab"),
-               ENTITY_DEF("Equilibrium", 8652, "\xe2\x87\x8c"),
-               ENTITY_DEF("aacute", 225, "\xc3\xa1"),
-               ENTITY_DEF("omid", 10678, "\xe2\xa6\xb6"),
-               ENTITY_DEF("loarr", 8701, "\xe2\x87\xbd"),
-               ENTITY_DEF("SucceedsSlantEqual", 8829, "\xe2\x89\xbd"),
-               ENTITY_DEF("angsph", 8738, "\xe2\x88\xa2"),
-               ENTITY_DEF("nsmid", 8740, "\xe2\x88\xa4"),
-               ENTITY_DEF("lsquor", 8218, "\xe2\x80\x9a"),
-               ENTITY_DEF("cemptyv", 10674, "\xe2\xa6\xb2"),
-               ENTITY_DEF("rAarr", 8667, "\xe2\x87\x9b"),
-               ENTITY_DEF("searr", 8600, "\xe2\x86\x98"),
-               ENTITY_DEF("complexes", 8450, "\xe2\x84\x82"),
-               ENTITY_DEF("UnderParenthesis", 9181, "\xe2\x8f\x9d"),
-               ENTITY_DEF("nparsl", 11005, "\xe2\xab\xbd\xe2\x83\xa5"),
-               ENTITY_DEF("Lacute", 313, "\xc4\xb9"),
-               ENTITY_DEF("deg", 176, "\xc2\xb0"),
-               ENTITY_DEF("Racute", 340, "\xc5\x94"),
-               ENTITY_DEF("Verbar", 8214, "\xe2\x80\x96"),
-               ENTITY_DEF("sqcups", 8852, "\xe2\x8a\x94\xef\xb8\x80"),
-               ENTITY_DEF("Hopf", 8461, "\xe2\x84\x8d"),
-               ENTITY_DEF("naturals", 8469, "\xe2\x84\x95"),
-               ENTITY_DEF("Cedilla", 184, "\xc2\xb8"),
-               ENTITY_DEF("exponentiale", 8519, "\xe2\x85\x87"),
-               ENTITY_DEF("vnsup", 8835, "\xe2\x8a\x83\xe2\x83\x92"),
-               ENTITY_DEF("leftrightarrows", 8646, "\xe2\x87\x86"),
-               ENTITY_DEF("Laplacetrf", 8466, "\xe2\x84\x92"),
-               ENTITY_DEF("vartriangleright", 8883, "\xe2\x8a\xb3"),
-               ENTITY_DEF("rtri", 9657, "\xe2\x96\xb9"),
-               ENTITY_DEF("gE", 8807, "\xe2\x89\xa7"),
-               ENTITY_DEF("SmallCircle", 8728, "\xe2\x88\x98"),
-               ENTITY_DEF("diamondsuit", 9830, "\xe2\x99\xa6"),
-               ENTITY_DEF("Otilde", 213, "\xc3\x95"),
-               ENTITY_DEF("lneq", 10887, "\xe2\xaa\x87"),
-               ENTITY_DEF("lesdoto", 10881, "\xe2\xaa\x81"),
-               ENTITY_DEF("ltquest", 10875, "\xe2\xa9\xbb"),
-               ENTITY_DEF("thinsp", 8201, "\xe2\x80\x89"),
-               ENTITY_DEF("barwed", 8965, "\xe2\x8c\x85"),
-               ENTITY_DEF("elsdot", 10903, "\xe2\xaa\x97"),
-               ENTITY_DEF("circ", 710, "\xcb\x86"),
-               ENTITY_DEF("ni", 8715, "\xe2\x88\x8b"),
-               ENTITY_DEF("mlcp", 10971, "\xe2\xab\x9b"),
-               ENTITY_DEF("Vdash", 8873, "\xe2\x8a\xa9"),
-               ENTITY_DEF("ShortRightArrow", 8594, "\xe2\x86\x92"),
-               ENTITY_DEF("upharpoonleft", 8639, "\xe2\x86\xbf"),
-               ENTITY_DEF("UnderBracket", 9141, "\xe2\x8e\xb5"),
-               ENTITY_DEF("rAtail", 10524, "\xe2\xa4\x9c"),
-               ENTITY_DEF("iopf", 120154, "\xf0\x9d\x95\x9a"),
-               ENTITY_DEF("longleftarrow", 10229, "\xe2\x9f\xb5"),
-               ENTITY_DEF("Zacute", 377, "\xc5\xb9"),
-               ENTITY_DEF("duhar", 10607, "\xe2\xa5\xaf"),
-               ENTITY_DEF("Mfr", 120080, "\xf0\x9d\x94\x90"),
-               ENTITY_DEF("prnap", 10937, "\xe2\xaa\xb9"),
-               ENTITY_DEF("eqcirc", 8790, "\xe2\x89\x96"),
-               ENTITY_DEF("rarrlp", 8620, "\xe2\x86\xac"),
-               ENTITY_DEF("le", 8804, "\xe2\x89\xa4"),
-               ENTITY_DEF("Oscr", 119978, "\xf0\x9d\x92\xaa"),
-               ENTITY_DEF("langd", 10641, "\xe2\xa6\x91"),
-               ENTITY_DEF("Ucirc", 219, "\xc3\x9b"),
-               ENTITY_DEF("precnapprox", 10937, "\xe2\xaa\xb9"),
-               ENTITY_DEF("succcurlyeq", 8829, "\xe2\x89\xbd"),
-               ENTITY_DEF("Tau", 932, "\xce\xa4"),
-               ENTITY_DEF("larr", 8592, "\xe2\x86\x90"),
-               ENTITY_DEF("neArr", 8663, "\xe2\x87\x97"),
-               ENTITY_DEF("subsim", 10951, "\xe2\xab\x87"),
-               ENTITY_DEF("DScy", 1029, "\xd0\x85"),
-               ENTITY_DEF("preccurlyeq", 8828, "\xe2\x89\xbc"),
-               ENTITY_DEF("NotLessLess", 8810, "\xe2\x89\xaa\xcc\xb8"),
-               ENTITY_DEF("succnapprox", 10938, "\xe2\xaa\xba"),
-               ENTITY_DEF("prcue", 8828, "\xe2\x89\xbc"),
-               ENTITY_DEF("Downarrow", 8659, "\xe2\x87\x93"),
-               ENTITY_DEF("angmsdah", 10671, "\xe2\xa6\xaf"),
-               ENTITY_DEF("Emacr", 274, "\xc4\x92"),
-               ENTITY_DEF("lsh", 8624, "\xe2\x86\xb0"),
-               ENTITY_DEF("simne", 8774, "\xe2\x89\x86"),
-               ENTITY_DEF("Bumpeq", 8782, "\xe2\x89\x8e"),
-               ENTITY_DEF("RightUpTeeVector", 10588, "\xe2\xa5\x9c"),
-               ENTITY_DEF("Sigma", 931, "\xce\xa3"),
-               ENTITY_DEF("nvltrie", 8884, "\xe2\x8a\xb4\xe2\x83\x92"),
-               ENTITY_DEF("lfr", 120105, "\xf0\x9d\x94\xa9"),
-               ENTITY_DEF("emsp13", 8196, "\xe2\x80\x84"),
-               ENTITY_DEF("parsl", 11005, "\xe2\xab\xbd"),
-               ENTITY_DEF("ucirc", 251, "\xc3\xbb"),
-               ENTITY_DEF("gsiml", 10896, "\xe2\xaa\x90"),
-               ENTITY_DEF("xsqcup", 10758, "\xe2\xa8\x86"),
-               ENTITY_DEF("Omicron", 927, "\xce\x9f"),
-               ENTITY_DEF("gsime", 10894, "\xe2\xaa\x8e"),
-               ENTITY_DEF("circlearrowleft", 8634, "\xe2\x86\xba"),
-               ENTITY_DEF("sqsupe", 8850, "\xe2\x8a\x92"),
-               ENTITY_DEF("supE", 10950, "\xe2\xab\x86"),
-               ENTITY_DEF("dlcrop", 8973, "\xe2\x8c\x8d"),
-               ENTITY_DEF("RightDownTeeVector", 10589, "\xe2\xa5\x9d"),
-               ENTITY_DEF("Colone", 10868, "\xe2\xa9\xb4"),
-               ENTITY_DEF("awconint", 8755, "\xe2\x88\xb3"),
-               ENTITY_DEF("smte", 10924, "\xe2\xaa\xac"),
-               ENTITY_DEF("lEg", 10891, "\xe2\xaa\x8b"),
-               ENTITY_DEF("circledast", 8859, "\xe2\x8a\x9b"),
-               ENTITY_DEF("ecolon", 8789, "\xe2\x89\x95"),
-               ENTITY_DEF("rect", 9645, "\xe2\x96\xad"),
-               ENTITY_DEF("Equal", 10869, "\xe2\xa9\xb5"),
-               ENTITY_DEF("nwnear", 10535, "\xe2\xa4\xa7"),
-               ENTITY_DEF("capdot", 10816, "\xe2\xa9\x80"),
-               ENTITY_DEF("straightphi", 981, "\xcf\x95"),
-               ENTITY_DEF("forkv", 10969, "\xe2\xab\x99"),
-               ENTITY_DEF("ZHcy", 1046, "\xd0\x96"),
-               ENTITY_DEF("Element", 8712, "\xe2\x88\x88"),
-               ENTITY_DEF("rthree", 8908, "\xe2\x8b\x8c"),
-               ENTITY_DEF("vzigzag", 10650, "\xe2\xa6\x9a"),
-               ENTITY_DEF("hybull", 8259, "\xe2\x81\x83"),
-               ENTITY_DEF("intprod", 10812, "\xe2\xa8\xbc"),
-               ENTITY_DEF("HumpEqual", 8783, "\xe2\x89\x8f"),
-               ENTITY_DEF("bigsqcup", 10758, "\xe2\xa8\x86"),
-               ENTITY_DEF("mp", 8723, "\xe2\x88\x93"),
-               ENTITY_DEF("lescc", 10920, "\xe2\xaa\xa8"),
-               ENTITY_DEF("NotPrecedes", 8832, "\xe2\x8a\x80"),
-               ENTITY_DEF("wedge", 8743, "\xe2\x88\xa7"),
-               ENTITY_DEF("Supset", 8913, "\xe2\x8b\x91"),
-               ENTITY_DEF("pm", 177, "\xc2\xb1"),
-               ENTITY_DEF("kfr", 120104, "\xf0\x9d\x94\xa8"),
-               ENTITY_DEF("ufisht", 10622, "\xe2\xa5\xbe"),
-               ENTITY_DEF("ecaron", 283, "\xc4\x9b"),
-               ENTITY_DEF("chcy", 1095, "\xd1\x87"),
-               ENTITY_DEF("Esim", 10867, "\xe2\xa9\xb3"),
-               ENTITY_DEF("fltns", 9649, "\xe2\x96\xb1"),
-               ENTITY_DEF("nsce", 10928, "\xe2\xaa\xb0\xcc\xb8"),
-               ENTITY_DEF("hookrightarrow", 8618, "\xe2\x86\xaa"),
-               ENTITY_DEF("semi", 59, "\x3b"),
-               ENTITY_DEF("ges", 10878, "\xe2\xa9\xbe"),
-               ENTITY_DEF("approxeq", 8778, "\xe2\x89\x8a"),
-               ENTITY_DEF("rarrsim", 10612, "\xe2\xa5\xb4"),
-               ENTITY_DEF("boxhD", 9573, "\xe2\x95\xa5"),
-               ENTITY_DEF("varpi", 982, "\xcf\x96"),
-               ENTITY_DEF("larrb", 8676, "\xe2\x87\xa4"),
-               ENTITY_DEF("copf", 120148, "\xf0\x9d\x95\x94"),
-               ENTITY_DEF("Dopf", 120123, "\xf0\x9d\x94\xbb"),
-               ENTITY_DEF("LeftVector", 8636, "\xe2\x86\xbc"),
-               ENTITY_DEF("iff", 8660, "\xe2\x87\x94"),
-               ENTITY_DEF("lnap", 10889, "\xe2\xaa\x89"),
-               ENTITY_DEF("NotGreaterFullEqual", 8807, "\xe2\x89\xa7\xcc\xb8"),
-               ENTITY_DEF("varrho", 1009, "\xcf\xb1"),
-               ENTITY_DEF("NotSucceeds", 8833, "\xe2\x8a\x81"),
-               ENTITY_DEF("ltrPar", 10646, "\xe2\xa6\x96"),
-               ENTITY_DEF("nlE", 8806, "\xe2\x89\xa6\xcc\xb8"),
-               ENTITY_DEF("Zfr", 8488, "\xe2\x84\xa8"),
-               ENTITY_DEF("LeftArrowBar", 8676, "\xe2\x87\xa4"),
-               ENTITY_DEF("boxplus", 8862, "\xe2\x8a\x9e"),
-               ENTITY_DEF("sqsube", 8849, "\xe2\x8a\x91"),
-               ENTITY_DEF("Re", 8476, "\xe2\x84\x9c"),
-               ENTITY_DEF("Wfr", 120090, "\xf0\x9d\x94\x9a"),
-               ENTITY_DEF("epsi", 949, "\xce\xb5"),
-               ENTITY_DEF("oacute", 243, "\xc3\xb3"),
-               ENTITY_DEF("bdquo", 8222, "\xe2\x80\x9e"),
-               ENTITY_DEF("wscr", 120012, "\xf0\x9d\x93\x8c"),
-               ENTITY_DEF("bullet", 8226, "\xe2\x80\xa2"),
-               ENTITY_DEF("frown", 8994, "\xe2\x8c\xa2"),
-               ENTITY_DEF("siml", 10909, "\xe2\xaa\x9d"),
-               ENTITY_DEF("Rarr", 8608, "\xe2\x86\xa0"),
-               ENTITY_DEF("Scaron", 352, "\xc5\xa0"),
-               ENTITY_DEF("gtreqqless", 10892, "\xe2\xaa\x8c"),
-               ENTITY_DEF("Larr", 8606, "\xe2\x86\x9e"),
-               ENTITY_DEF("notniva", 8716, "\xe2\x88\x8c"),
-               ENTITY_DEF("gg", 8811, "\xe2\x89\xab"),
-               ENTITY_DEF("phmmat", 8499, "\xe2\x84\xb3"),
-               ENTITY_DEF("boxVL", 9571, "\xe2\x95\xa3"),
-               ENTITY_DEF("sigmav", 962, "\xcf\x82"),
-               ENTITY_DEF("order", 8500, "\xe2\x84\xb4"),
-               ENTITY_DEF("subsup", 10963, "\xe2\xab\x93"),
-               ENTITY_DEF("afr", 120094, "\xf0\x9d\x94\x9e"),
-               ENTITY_DEF("lbrace", 123, "\x7b"),
-               ENTITY_DEF("urcorn", 8989, "\xe2\x8c\x9d"),
-               ENTITY_DEF("Im", 8465, "\xe2\x84\x91"),
-               ENTITY_DEF("CounterClockwiseContourIntegral", 8755, "\xe2\x88\xb3"),
-               ENTITY_DEF("lne", 10887, "\xe2\xaa\x87"),
-               ENTITY_DEF("chi", 967, "\xcf\x87"),
-               ENTITY_DEF("cudarrl", 10552, "\xe2\xa4\xb8"),
-               ENTITY_DEF("ang", 8736, "\xe2\x88\xa0"),
-               ENTITY_DEF("isindot", 8949, "\xe2\x8b\xb5"),
-               ENTITY_DEF("Lfr", 120079, "\xf0\x9d\x94\x8f"),
-               ENTITY_DEF("Rsh", 8625, "\xe2\x86\xb1"),
-               ENTITY_DEF("Ocy", 1054, "\xd0\x9e"),
-               ENTITY_DEF("nvrArr", 10499, "\xe2\xa4\x83"),
-               ENTITY_DEF("otimes", 8855, "\xe2\x8a\x97"),
-               ENTITY_DEF("eqslantgtr", 10902, "\xe2\xaa\x96"),
-               ENTITY_DEF("Rfr", 8476, "\xe2\x84\x9c"),
-               ENTITY_DEF("blacktriangleleft", 9666, "\xe2\x97\x82"),
-               ENTITY_DEF("Lsh", 8624, "\xe2\x86\xb0"),
-               ENTITY_DEF("boxvr", 9500, "\xe2\x94\x9c"),
-               ENTITY_DEF("scedil", 351, "\xc5\x9f"),
-               ENTITY_DEF("iuml", 239, "\xc3\xaf"),
-               ENTITY_DEF("NJcy", 1034, "\xd0\x8a"),
-               ENTITY_DEF("Dagger", 8225, "\xe2\x80\xa1"),
-               ENTITY_DEF("rarrap", 10613, "\xe2\xa5\xb5"),
-               ENTITY_DEF("udblac", 369, "\xc5\xb1"),
-               ENTITY_DEF("Sopf", 120138, "\xf0\x9d\x95\x8a"),
-               ENTITY_DEF("scnsim", 8937, "\xe2\x8b\xa9"),
-               ENTITY_DEF("hbar", 8463, "\xe2\x84\x8f"),
-               ENTITY_DEF("frac15", 8533, "\xe2\x85\x95"),
-               ENTITY_DEF("sup3", 179, "\xc2\xb3"),
-               ENTITY_DEF("NegativeThickSpace", 8203, "\xe2\x80\x8b"),
-               ENTITY_DEF("npr", 8832, "\xe2\x8a\x80"),
-               ENTITY_DEF("doteq", 8784, "\xe2\x89\x90"),
-               ENTITY_DEF("subrarr", 10617, "\xe2\xa5\xb9"),
-               ENTITY_DEF("SquareSubset", 8847, "\xe2\x8a\x8f"),
-               ENTITY_DEF("vprop", 8733, "\xe2\x88\x9d"),
-               ENTITY_DEF("OpenCurlyQuote", 8216, "\xe2\x80\x98"),
-               ENTITY_DEF("supseteq", 8839, "\xe2\x8a\x87"),
-               ENTITY_DEF("nRightarrow", 8655, "\xe2\x87\x8f"),
-               ENTITY_DEF("Longleftarrow", 10232, "\xe2\x9f\xb8"),
-               ENTITY_DEF("lsquo", 8216, "\xe2\x80\x98"),
-               ENTITY_DEF("hstrok", 295, "\xc4\xa7"),
-               ENTITY_DEF("NotTilde", 8769, "\xe2\x89\x81"),
-               ENTITY_DEF("ogt", 10689, "\xe2\xa7\x81"),
-               ENTITY_DEF("block", 9608, "\xe2\x96\x88"),
-               ENTITY_DEF("minusd", 8760, "\xe2\x88\xb8"),
-               ENTITY_DEF("esdot", 8784, "\xe2\x89\x90"),
-               ENTITY_DEF("nsim", 8769, "\xe2\x89\x81"),
-               ENTITY_DEF("scsim", 8831, "\xe2\x89\xbf"),
-               ENTITY_DEF("boxVl", 9570, "\xe2\x95\xa2"),
-               ENTITY_DEF("ltimes", 8905, "\xe2\x8b\x89"),
-               ENTITY_DEF("thkap", 8776, "\xe2\x89\x88"),
-               ENTITY_DEF("vnsub", 8834, "\xe2\x8a\x82\xe2\x83\x92"),
-               ENTITY_DEF("thetasym", 977, "\xcf\x91"),
-               ENTITY_DEF("eopf", 120150, "\xf0\x9d\x95\x96"),
-               ENTITY_DEF("image", 8465, "\xe2\x84\x91"),
-               ENTITY_DEF("doteqdot", 8785, "\xe2\x89\x91"),
-               ENTITY_DEF("Udblac", 368, "\xc5\xb0"),
-               ENTITY_DEF("gnsim", 8935, "\xe2\x8b\xa7"),
-               ENTITY_DEF("yicy", 1111, "\xd1\x97"),
-               ENTITY_DEF("vopf", 120167, "\xf0\x9d\x95\xa7"),
-               ENTITY_DEF("DDotrahd", 10513, "\xe2\xa4\x91"),
-               ENTITY_DEF("Iota", 921, "\xce\x99"),
-               ENTITY_DEF("GJcy", 1027, "\xd0\x83"),
-               ENTITY_DEF("rightthreetimes", 8908, "\xe2\x8b\x8c"),
-               ENTITY_DEF("nrtri", 8939, "\xe2\x8b\xab"),
-               ENTITY_DEF("TildeFullEqual", 8773, "\xe2\x89\x85"),
-               ENTITY_DEF("Dcaron", 270, "\xc4\x8e"),
-               ENTITY_DEF("ccaron", 269, "\xc4\x8d"),
-               ENTITY_DEF("lacute", 314, "\xc4\xba"),
-               ENTITY_DEF("VerticalBar", 8739, "\xe2\x88\xa3"),
-               ENTITY_DEF("Igrave", 204, "\xc3\x8c"),
-               ENTITY_DEF("boxH", 9552, "\xe2\x95\x90"),
-               ENTITY_DEF("Pfr", 120083, "\xf0\x9d\x94\x93"),
-               ENTITY_DEF("equals", 61, "\x3d"),
-               ENTITY_DEF("rbrack", 93, "\x5d"),
-               ENTITY_DEF("OverParenthesis", 9180, "\xe2\x8f\x9c"),
-               ENTITY_DEF("in", 8712, "\xe2\x88\x88"),
-               ENTITY_DEF("llcorner", 8990, "\xe2\x8c\x9e"),
-               ENTITY_DEF("mcomma", 10793, "\xe2\xa8\xa9"),
-               ENTITY_DEF("NotGreater", 8815, "\xe2\x89\xaf"),
-               ENTITY_DEF("midcir", 10992, "\xe2\xab\xb0"),
-               ENTITY_DEF("Edot", 278, "\xc4\x96"),
-               ENTITY_DEF("oplus", 8853, "\xe2\x8a\x95"),
-               ENTITY_DEF("geqq", 8807, "\xe2\x89\xa7"),
-               ENTITY_DEF("curvearrowleft", 8630, "\xe2\x86\xb6"),
-               ENTITY_DEF("Poincareplane", 8460, "\xe2\x84\x8c"),
-               ENTITY_DEF("yscr", 120014, "\xf0\x9d\x93\x8e"),
-               ENTITY_DEF("ccaps", 10829, "\xe2\xa9\x8d"),
-               ENTITY_DEF("rpargt", 10644, "\xe2\xa6\x94"),
-               ENTITY_DEF("topfork", 10970, "\xe2\xab\x9a"),
-               ENTITY_DEF("Gamma", 915, "\xce\x93"),
-               ENTITY_DEF("umacr", 363, "\xc5\xab"),
-               ENTITY_DEF("frac13", 8531, "\xe2\x85\x93"),
-               ENTITY_DEF("cirfnint", 10768, "\xe2\xa8\x90"),
-               ENTITY_DEF("xlArr", 10232, "\xe2\x9f\xb8"),
-               ENTITY_DEF("digamma", 989, "\xcf\x9d"),
-               ENTITY_DEF("Hat", 94, "\x5e"),
-               ENTITY_DEF("lates", 10925, "\xe2\xaa\xad\xef\xb8\x80"),
-               ENTITY_DEF("lgE", 10897, "\xe2\xaa\x91"),
-               ENTITY_DEF("commat", 64, "\x40"),
-               ENTITY_DEF("NotPrecedesSlantEqual", 8928, "\xe2\x8b\xa0"),
-               ENTITY_DEF("phone", 9742, "\xe2\x98\x8e"),
-               ENTITY_DEF("Ecirc", 202, "\xc3\x8a"),
-               ENTITY_DEF("lt", 60, "\x3c"),
-               ENTITY_DEF("intcal", 8890, "\xe2\x8a\xba"),
-               ENTITY_DEF("xdtri", 9661, "\xe2\x96\xbd"),
-               ENTITY_DEF("Abreve", 258, "\xc4\x82"),
-               ENTITY_DEF("gopf", 120152, "\xf0\x9d\x95\x98"),
-               ENTITY_DEF("Xopf", 120143, "\xf0\x9d\x95\x8f"),
-               ENTITY_DEF("Iacute", 205, "\xc3\x8d"),
-               ENTITY_DEF("Aopf", 120120, "\xf0\x9d\x94\xb8"),
-               ENTITY_DEF("gbreve", 287, "\xc4\x9f"),
-               ENTITY_DEF("nleq", 8816, "\xe2\x89\xb0"),
-               ENTITY_DEF("xopf", 120169, "\xf0\x9d\x95\xa9"),
-               ENTITY_DEF("SquareSupersetEqual", 8850, "\xe2\x8a\x92"),
-               ENTITY_DEF("NotLessTilde", 8820, "\xe2\x89\xb4"),
-               ENTITY_DEF("SubsetEqual", 8838, "\xe2\x8a\x86"),
-               ENTITY_DEF("Sc", 10940, "\xe2\xaa\xbc"),
-               ENTITY_DEF("sdote", 10854, "\xe2\xa9\xa6"),
-               ENTITY_DEF("loplus", 10797, "\xe2\xa8\xad"),
-               ENTITY_DEF("zfr", 120119, "\xf0\x9d\x94\xb7"),
-               ENTITY_DEF("subseteqq", 10949, "\xe2\xab\x85"),
-               ENTITY_DEF("Vdashl", 10982, "\xe2\xab\xa6"),
-               ENTITY_DEF("integers", 8484, "\xe2\x84\xa4"),
-               ENTITY_DEF("Umacr", 362, "\xc5\xaa"),
-               ENTITY_DEF("dopf", 120149, "\xf0\x9d\x95\x95"),
-               ENTITY_DEF("RightDownVectorBar", 10581, "\xe2\xa5\x95"),
-               ENTITY_DEF("angmsdaf", 10669, "\xe2\xa6\xad"),
-               ENTITY_DEF("Jfr", 120077, "\xf0\x9d\x94\x8d"),
-               ENTITY_DEF("bernou", 8492, "\xe2\x84\xac"),
-               ENTITY_DEF("lceil", 8968, "\xe2\x8c\x88"),
-               ENTITY_DEF("nvsim", 8764, "\xe2\x88\xbc\xe2\x83\x92"),
-               ENTITY_DEF("NotSucceedsSlantEqual", 8929, "\xe2\x8b\xa1"),
-               ENTITY_DEF("hearts", 9829, "\xe2\x99\xa5"),
-               ENTITY_DEF("vee", 8744, "\xe2\x88\xa8"),
-               ENTITY_DEF("LJcy", 1033, "\xd0\x89"),
-               ENTITY_DEF("nlt", 8814, "\xe2\x89\xae"),
-               ENTITY_DEF("because", 8757, "\xe2\x88\xb5"),
-               ENTITY_DEF("hairsp", 8202, "\xe2\x80\x8a"),
-               ENTITY_DEF("comma", 44, "\x2c"),
-               ENTITY_DEF("iecy", 1077, "\xd0\xb5"),
-               ENTITY_DEF("npre", 10927, "\xe2\xaa\xaf\xcc\xb8"),
-               ENTITY_DEF("NotSquareSubset", 8847, "\xe2\x8a\x8f\xcc\xb8"),
-               ENTITY_DEF("mscr", 120002, "\xf0\x9d\x93\x82"),
-               ENTITY_DEF("jopf", 120155, "\xf0\x9d\x95\x9b"),
-               ENTITY_DEF("bumpE", 10926, "\xe2\xaa\xae"),
-               ENTITY_DEF("thicksim", 8764, "\xe2\x88\xbc"),
-               ENTITY_DEF("Nfr", 120081, "\xf0\x9d\x94\x91"),
-               ENTITY_DEF("yucy", 1102, "\xd1\x8e"),
-               ENTITY_DEF("notinvc", 8950, "\xe2\x8b\xb6"),
-               ENTITY_DEF("lstrok", 322, "\xc5\x82"),
-               ENTITY_DEF("robrk", 10215, "\xe2\x9f\xa7"),
-               ENTITY_DEF("LeftTriangleBar", 10703, "\xe2\xa7\x8f"),
-               ENTITY_DEF("hksearow", 10533, "\xe2\xa4\xa5"),
-               ENTITY_DEF("bigcap", 8898, "\xe2\x8b\x82"),
-               ENTITY_DEF("udhar", 10606, "\xe2\xa5\xae"),
-               ENTITY_DEF("Yscr", 119988, "\xf0\x9d\x92\xb4"),
-               ENTITY_DEF("smeparsl", 10724, "\xe2\xa7\xa4"),
-               ENTITY_DEF("NotLess", 8814, "\xe2\x89\xae"),
-               ENTITY_DEF("dcaron", 271, "\xc4\x8f"),
-               ENTITY_DEF("ange", 10660, "\xe2\xa6\xa4"),
-               ENTITY_DEF("dHar", 10597, "\xe2\xa5\xa5"),
-               ENTITY_DEF("UpperRightArrow", 8599, "\xe2\x86\x97"),
-               ENTITY_DEF("trpezium", 9186, "\xe2\x8f\xa2"),
-               ENTITY_DEF("boxminus", 8863, "\xe2\x8a\x9f"),
-               ENTITY_DEF("notni", 8716, "\xe2\x88\x8c"),
-               ENTITY_DEF("dtrif", 9662, "\xe2\x96\xbe"),
-               ENTITY_DEF("nhArr", 8654, "\xe2\x87\x8e"),
-               ENTITY_DEF("larrpl", 10553, "\xe2\xa4\xb9"),
-               ENTITY_DEF("simeq", 8771, "\xe2\x89\x83"),
-               ENTITY_DEF("geqslant", 10878, "\xe2\xa9\xbe"),
-               ENTITY_DEF("RightUpVectorBar", 10580, "\xe2\xa5\x94"),
-               ENTITY_DEF("nsc", 8833, "\xe2\x8a\x81"),
-               ENTITY_DEF("div", 247, "\xc3\xb7"),
-               ENTITY_DEF("orslope", 10839, "\xe2\xa9\x97"),
-               ENTITY_DEF("lparlt", 10643, "\xe2\xa6\x93"),
-               ENTITY_DEF("trie", 8796, "\xe2\x89\x9c"),
-               ENTITY_DEF("cirmid", 10991, "\xe2\xab\xaf"),
-               ENTITY_DEF("wp", 8472, "\xe2\x84\x98"),
-               ENTITY_DEF("dagger", 8224, "\xe2\x80\xa0"),
-               ENTITY_DEF("utri", 9653, "\xe2\x96\xb5"),
-               ENTITY_DEF("supnE", 10956, "\xe2\xab\x8c"),
-               ENTITY_DEF("eg", 10906, "\xe2\xaa\x9a"),
-               ENTITY_DEF("LeftDownVector", 8643, "\xe2\x87\x83"),
-               ENTITY_DEF("NotLessEqual", 8816, "\xe2\x89\xb0"),
-               ENTITY_DEF("Bopf", 120121, "\xf0\x9d\x94\xb9"),
-               ENTITY_DEF("LongLeftRightArrow", 10231, "\xe2\x9f\xb7"),
-               ENTITY_DEF("Gfr", 120074, "\xf0\x9d\x94\x8a"),
-               ENTITY_DEF("sqsubseteq", 8849, "\xe2\x8a\x91"),
-               ENTITY_DEF("ograve", 242, "\xc3\xb2"),
-               ENTITY_DEF("larrhk", 8617, "\xe2\x86\xa9"),
-               ENTITY_DEF("sigma", 963, "\xcf\x83"),
-               ENTITY_DEF("NotSquareSupersetEqual", 8931, "\xe2\x8b\xa3"),
-               ENTITY_DEF("gvnE", 8809, "\xe2\x89\xa9\xef\xb8\x80"),
-               ENTITY_DEF("timesbar", 10801, "\xe2\xa8\xb1"),
-               ENTITY_DEF("Iukcy", 1030, "\xd0\x86"),
-               ENTITY_DEF("bscr", 119991, "\xf0\x9d\x92\xb7"),
-               ENTITY_DEF("Exists", 8707, "\xe2\x88\x83"),
-               ENTITY_DEF("tscr", 120009, "\xf0\x9d\x93\x89"),
-               ENTITY_DEF("tcy", 1090, "\xd1\x82"),
-               ENTITY_DEF("nwarr", 8598, "\xe2\x86\x96"),
-               ENTITY_DEF("hoarr", 8703, "\xe2\x87\xbf"),
-               ENTITY_DEF("lnapprox", 10889, "\xe2\xaa\x89"),
-               ENTITY_DEF("nu", 957, "\xce\xbd"),
-               ENTITY_DEF("bcy", 1073, "\xd0\xb1"),
-               ENTITY_DEF("ndash", 8211, "\xe2\x80\x93"),
-               ENTITY_DEF("smt", 10922, "\xe2\xaa\xaa"),
-               ENTITY_DEF("scaron", 353, "\xc5\xa1"),
-               ENTITY_DEF("IOcy", 1025, "\xd0\x81"),
-               ENTITY_DEF("Ifr", 8465, "\xe2\x84\x91"),
-               ENTITY_DEF("cularrp", 10557, "\xe2\xa4\xbd"),
-               ENTITY_DEF("lvertneqq", 8808, "\xe2\x89\xa8\xef\xb8\x80"),
-               ENTITY_DEF("nlarr", 8602, "\xe2\x86\x9a"),
-               ENTITY_DEF("colon", 58, "\x3a"),
-               ENTITY_DEF("ddotseq", 10871, "\xe2\xa9\xb7"),
-               ENTITY_DEF("zacute", 378, "\xc5\xba"),
-               ENTITY_DEF("DoubleVerticalBar", 8741, "\xe2\x88\xa5"),
-               ENTITY_DEF("larrfs", 10525, "\xe2\xa4\x9d"),
-               ENTITY_DEF("NotExists", 8708, "\xe2\x88\x84"),
-               ENTITY_DEF("geq", 8805, "\xe2\x89\xa5"),
-               ENTITY_DEF("Ffr", 120073, "\xf0\x9d\x94\x89"),
-               ENTITY_DEF("divide", 247, "\xc3\xb7"),
-               ENTITY_DEF("blank", 9251, "\xe2\x90\xa3"),
-               ENTITY_DEF("IEcy", 1045, "\xd0\x95"),
-               ENTITY_DEF("ordm", 186, "\xc2\xba"),
-               ENTITY_DEF("fopf", 120151, "\xf0\x9d\x95\x97"),
-               ENTITY_DEF("ecir", 8790, "\xe2\x89\x96"),
-               ENTITY_DEF("complement", 8705, "\xe2\x88\x81"),
-               ENTITY_DEF("top", 8868, "\xe2\x8a\xa4"),
-               ENTITY_DEF("DoubleContourIntegral", 8751, "\xe2\x88\xaf"),
-               ENTITY_DEF("nisd", 8954, "\xe2\x8b\xba"),
-               ENTITY_DEF("bcong", 8780, "\xe2\x89\x8c"),
-               ENTITY_DEF("plusdu", 10789, "\xe2\xa8\xa5"),
-               ENTITY_DEF("TildeTilde", 8776, "\xe2\x89\x88"),
-               ENTITY_DEF("lnE", 8808, "\xe2\x89\xa8"),
-               ENTITY_DEF("DoubleLongRightArrow", 10233, "\xe2\x9f\xb9"),
-               ENTITY_DEF("nsubseteqq", 10949, "\xe2\xab\x85\xcc\xb8"),
-               ENTITY_DEF("DownTeeArrow", 8615, "\xe2\x86\xa7"),
-               ENTITY_DEF("Cscr", 119966, "\xf0\x9d\x92\x9e"),
-               ENTITY_DEF("NegativeVeryThinSpace", 8203, "\xe2\x80\x8b"),
-               ENTITY_DEF("emsp", 8195, "\xe2\x80\x83"),
-               ENTITY_DEF("vartriangleleft", 8882, "\xe2\x8a\xb2"),
-               ENTITY_DEF("ropar", 10630, "\xe2\xa6\x86"),
-               ENTITY_DEF("checkmark", 10003, "\xe2\x9c\x93"),
-               ENTITY_DEF("Ycy", 1067, "\xd0\xab"),
-               ENTITY_DEF("supset", 8835, "\xe2\x8a\x83"),
-               ENTITY_DEF("gneqq", 8809, "\xe2\x89\xa9"),
-               ENTITY_DEF("Lstrok", 321, "\xc5\x81"),
-               ENTITY_DEF("AMP", 38, "\x26"),
-               ENTITY_DEF("acE", 8766, "\xe2\x88\xbe\xcc\xb3"),
-               ENTITY_DEF("sqsupseteq", 8850, "\xe2\x8a\x92"),
-               ENTITY_DEF("nle", 8816, "\xe2\x89\xb0"),
-               ENTITY_DEF("nesear", 10536, "\xe2\xa4\xa8"),
-               ENTITY_DEF("LeftDownVectorBar", 10585, "\xe2\xa5\x99"),
-               ENTITY_DEF("Integral", 8747, "\xe2\x88\xab"),
-               ENTITY_DEF("Beta", 914, "\xce\x92"),
-               ENTITY_DEF("nvdash", 8876, "\xe2\x8a\xac"),
-               ENTITY_DEF("nges", 10878, "\xe2\xa9\xbe\xcc\xb8"),
-               ENTITY_DEF("demptyv", 10673, "\xe2\xa6\xb1"),
-               ENTITY_DEF("eta", 951, "\xce\xb7"),
-               ENTITY_DEF("GreaterSlantEqual", 10878, "\xe2\xa9\xbe"),
-               ENTITY_DEF("ccedil", 231, "\xc3\xa7"),
-               ENTITY_DEF("pfr", 120109, "\xf0\x9d\x94\xad"),
-               ENTITY_DEF("bbrktbrk", 9142, "\xe2\x8e\xb6"),
-               ENTITY_DEF("mcy", 1084, "\xd0\xbc"),
-               ENTITY_DEF("Not", 10988, "\xe2\xab\xac"),
-               ENTITY_DEF("qscr", 120006, "\xf0\x9d\x93\x86"),
-               ENTITY_DEF("zwj", 8205, "\xe2\x80\x8d"),
-               ENTITY_DEF("ntrianglerighteq", 8941, "\xe2\x8b\xad"),
-               ENTITY_DEF("permil", 8240, "\xe2\x80\xb0"),
-               ENTITY_DEF("squarf", 9642, "\xe2\x96\xaa"),
-               ENTITY_DEF("apos", 39, "\x27"),
-               ENTITY_DEF("lrm", 8206, "\xe2\x80\x8e"),
-               ENTITY_DEF("male", 9794, "\xe2\x99\x82"),
-               ENTITY_DEF("agrave", 224, "\xc3\xa0"),
-               ENTITY_DEF("Lt", 8810, "\xe2\x89\xaa"),
-               ENTITY_DEF("capand", 10820, "\xe2\xa9\x84"),
-               ENTITY_DEF("aring", 229, "\xc3\xa5"),
-               ENTITY_DEF("Jukcy", 1028, "\xd0\x84"),
-               ENTITY_DEF("bumpe", 8783, "\xe2\x89\x8f"),
-               ENTITY_DEF("dd", 8518, "\xe2\x85\x86"),
-               ENTITY_DEF("tscy", 1094, "\xd1\x86"),
-               ENTITY_DEF("oS", 9416, "\xe2\x93\x88"),
-               ENTITY_DEF("succeq", 10928, "\xe2\xaa\xb0"),
-               ENTITY_DEF("xharr", 10231, "\xe2\x9f\xb7"),
-               ENTITY_DEF("pluse", 10866, "\xe2\xa9\xb2"),
-               ENTITY_DEF("rfisht", 10621, "\xe2\xa5\xbd"),
-               ENTITY_DEF("HorizontalLine", 9472, "\xe2\x94\x80"),
-               ENTITY_DEF("DiacriticalAcute", 180, "\xc2\xb4"),
-               ENTITY_DEF("hfr", 120101, "\xf0\x9d\x94\xa5"),
-               ENTITY_DEF("preceq", 10927, "\xe2\xaa\xaf"),
-               ENTITY_DEF("rationals", 8474, "\xe2\x84\x9a"),
-               ENTITY_DEF("Auml", 196, "\xc3\x84"),
-               ENTITY_DEF("LeftRightArrow", 8596, "\xe2\x86\x94"),
-               ENTITY_DEF("blacktriangleright", 9656, "\xe2\x96\xb8"),
-               ENTITY_DEF("dharr", 8642, "\xe2\x87\x82"),
-               ENTITY_DEF("isin", 8712, "\xe2\x88\x88"),
-               ENTITY_DEF("ldrushar", 10571, "\xe2\xa5\x8b"),
-               ENTITY_DEF("squ", 9633, "\xe2\x96\xa1"),
-               ENTITY_DEF("rbrksld", 10638, "\xe2\xa6\x8e"),
-               ENTITY_DEF("bigwedge", 8896, "\xe2\x8b\x80"),
-               ENTITY_DEF("swArr", 8665, "\xe2\x87\x99"),
-               ENTITY_DEF("IJlig", 306, "\xc4\xb2"),
-               ENTITY_DEF("harr", 8596, "\xe2\x86\x94"),
-               ENTITY_DEF("range", 10661, "\xe2\xa6\xa5"),
-               ENTITY_DEF("urtri", 9721, "\xe2\x97\xb9"),
-               ENTITY_DEF("NotVerticalBar", 8740, "\xe2\x88\xa4"),
-               ENTITY_DEF("ic", 8291, "\xe2\x81\xa3"),
-               ENTITY_DEF("solbar", 9023, "\xe2\x8c\xbf"),
-               ENTITY_DEF("approx", 8776, "\xe2\x89\x88"),
-               ENTITY_DEF("SquareSuperset", 8848, "\xe2\x8a\x90"),
-               ENTITY_DEF("numsp", 8199, "\xe2\x80\x87"),
-               ENTITY_DEF("nLt", 8810, "\xe2\x89\xaa\xe2\x83\x92"),
-               ENTITY_DEF("tilde", 732, "\xcb\x9c"),
-               ENTITY_DEF("rlarr", 8644, "\xe2\x87\x84"),
-               ENTITY_DEF("langle", 10216, "\xe2\x9f\xa8"),
-               ENTITY_DEF("nleqslant", 10877, "\xe2\xa9\xbd\xcc\xb8"),
-               ENTITY_DEF("Nacute", 323, "\xc5\x83"),
-               ENTITY_DEF("NotLeftTriangle", 8938, "\xe2\x8b\xaa"),
-               ENTITY_DEF("sopf", 120164, "\xf0\x9d\x95\xa4"),
-               ENTITY_DEF("xmap", 10236, "\xe2\x9f\xbc"),
-               ENTITY_DEF("supne", 8843, "\xe2\x8a\x8b"),
-               ENTITY_DEF("Int", 8748, "\xe2\x88\xac"),
-               ENTITY_DEF("nsupseteqq", 10950, "\xe2\xab\x86\xcc\xb8"),
-               ENTITY_DEF("circlearrowright", 8635, "\xe2\x86\xbb"),
-               ENTITY_DEF("NotCongruent", 8802, "\xe2\x89\xa2"),
-               ENTITY_DEF("Scedil", 350, "\xc5\x9e"),
-               ENTITY_DEF("raquo", 187, "\xc2\xbb"),
-               ENTITY_DEF("ycy", 1099, "\xd1\x8b"),
-               ENTITY_DEF("notinvb", 8951, "\xe2\x8b\xb7"),
-               ENTITY_DEF("andv", 10842, "\xe2\xa9\x9a"),
-               ENTITY_DEF("nap", 8777, "\xe2\x89\x89"),
-               ENTITY_DEF("shcy", 1096, "\xd1\x88"),
-               ENTITY_DEF("ssetmn", 8726, "\xe2\x88\x96"),
-               ENTITY_DEF("downarrow", 8595, "\xe2\x86\x93"),
-               ENTITY_DEF("gesdotol", 10884, "\xe2\xaa\x84"),
-               ENTITY_DEF("Congruent", 8801, "\xe2\x89\xa1"),
-               ENTITY_DEF("pound", 163, "\xc2\xa3"),
-               ENTITY_DEF("ZeroWidthSpace", 8203, "\xe2\x80\x8b"),
-               ENTITY_DEF("rdca", 10551, "\xe2\xa4\xb7"),
-               ENTITY_DEF("rmoust", 9137, "\xe2\x8e\xb1"),
-               ENTITY_DEF("zcy", 1079, "\xd0\xb7"),
-               ENTITY_DEF("Square", 9633, "\xe2\x96\xa1"),
-               ENTITY_DEF("subE", 10949, "\xe2\xab\x85"),
-               ENTITY_DEF("infintie", 10717, "\xe2\xa7\x9d"),
-               ENTITY_DEF("Cayleys", 8493, "\xe2\x84\xad"),
-               ENTITY_DEF("lsaquo", 8249, "\xe2\x80\xb9"),
-               ENTITY_DEF("realpart", 8476, "\xe2\x84\x9c"),
-               ENTITY_DEF("nprec", 8832, "\xe2\x8a\x80"),
-               ENTITY_DEF("RightTriangleBar", 10704, "\xe2\xa7\x90"),
-               ENTITY_DEF("Kopf", 120130, "\xf0\x9d\x95\x82"),
-               ENTITY_DEF("Ubreve", 364, "\xc5\xac"),
-               ENTITY_DEF("Uopf", 120140, "\xf0\x9d\x95\x8c"),
-               ENTITY_DEF("trianglelefteq", 8884, "\xe2\x8a\xb4"),
-               ENTITY_DEF("rotimes", 10805, "\xe2\xa8\xb5"),
-               ENTITY_DEF("qfr", 120110, "\xf0\x9d\x94\xae"),
-               ENTITY_DEF("gtcc", 10919, "\xe2\xaa\xa7"),
-               ENTITY_DEF("fnof", 402, "\xc6\x92"),
-               ENTITY_DEF("tritime", 10811, "\xe2\xa8\xbb"),
-               ENTITY_DEF("andslope", 10840, "\xe2\xa9\x98"),
-               ENTITY_DEF("harrw", 8621, "\xe2\x86\xad"),
-               ENTITY_DEF("NotSquareSuperset", 8848, "\xe2\x8a\x90\xcc\xb8"),
-               ENTITY_DEF("Amacr", 256, "\xc4\x80"),
-               ENTITY_DEF("OpenCurlyDoubleQuote", 8220, "\xe2\x80\x9c"),
-               ENTITY_DEF("thorn", 254, "\xc3\xbe"),
-               ENTITY_DEF("ordf", 170, "\xc2\xaa"),
-               ENTITY_DEF("natur", 9838, "\xe2\x99\xae"),
-               ENTITY_DEF("xi", 958, "\xce\xbe"),
-               ENTITY_DEF("infin", 8734, "\xe2\x88\x9e"),
-               ENTITY_DEF("nspar", 8742, "\xe2\x88\xa6"),
-               ENTITY_DEF("Jcy", 1049, "\xd0\x99"),
-               ENTITY_DEF("DownLeftTeeVector", 10590, "\xe2\xa5\x9e"),
-               ENTITY_DEF("rbarr", 10509, "\xe2\xa4\x8d"),
-               ENTITY_DEF("Xi", 926, "\xce\x9e"),
-               ENTITY_DEF("bull", 8226, "\xe2\x80\xa2"),
-               ENTITY_DEF("cuesc", 8927, "\xe2\x8b\x9f"),
-               ENTITY_DEF("backcong", 8780, "\xe2\x89\x8c"),
-               ENTITY_DEF("frac35", 8535, "\xe2\x85\x97"),
-               ENTITY_DEF("hscr", 119997, "\xf0\x9d\x92\xbd"),
-               ENTITY_DEF("LessEqualGreater", 8922, "\xe2\x8b\x9a"),
-               ENTITY_DEF("Implies", 8658, "\xe2\x87\x92"),
-               ENTITY_DEF("ETH", 208, "\xc3\x90"),
-               ENTITY_DEF("Yacute", 221, "\xc3\x9d"),
-               ENTITY_DEF("shy", 173, "\xc2\xad"),
-               ENTITY_DEF("Rarrtl", 10518, "\xe2\xa4\x96"),
-               ENTITY_DEF("sup1", 185, "\xc2\xb9"),
-               ENTITY_DEF("reals", 8477, "\xe2\x84\x9d"),
-               ENTITY_DEF("blacklozenge", 10731, "\xe2\xa7\xab"),
-               ENTITY_DEF("ncedil", 326, "\xc5\x86"),
-               ENTITY_DEF("Lambda", 923, "\xce\x9b"),
-               ENTITY_DEF("uopf", 120166, "\xf0\x9d\x95\xa6"),
-               ENTITY_DEF("bigodot", 10752, "\xe2\xa8\x80"),
-               ENTITY_DEF("ubreve", 365, "\xc5\xad"),
-               ENTITY_DEF("drbkarow", 10512, "\xe2\xa4\x90"),
-               ENTITY_DEF("els", 10901, "\xe2\xaa\x95"),
-               ENTITY_DEF("shortparallel", 8741, "\xe2\x88\xa5"),
-               ENTITY_DEF("Pcy", 1055, "\xd0\x9f"),
-               ENTITY_DEF("dsol", 10742, "\xe2\xa7\xb6"),
-               ENTITY_DEF("supsim", 10952, "\xe2\xab\x88"),
-               ENTITY_DEF("Longrightarrow", 10233, "\xe2\x9f\xb9"),
-               ENTITY_DEF("ThickSpace", 8287, "\xe2\x81\x9f\xe2\x80\x8a"),
-               ENTITY_DEF("Itilde", 296, "\xc4\xa8"),
-               ENTITY_DEF("nparallel", 8742, "\xe2\x88\xa6"),
-               ENTITY_DEF("And", 10835, "\xe2\xa9\x93"),
-               ENTITY_DEF("boxhd", 9516, "\xe2\x94\xac"),
-               ENTITY_DEF("Dashv", 10980, "\xe2\xab\xa4"),
-               ENTITY_DEF("NotSuperset", 8835, "\xe2\x8a\x83\xe2\x83\x92"),
-               ENTITY_DEF("Eta", 919, "\xce\x97"),
-               ENTITY_DEF("Qopf", 8474, "\xe2\x84\x9a"),
-               ENTITY_DEF("period", 46, "\x2e"),
-               ENTITY_DEF("angmsd", 8737, "\xe2\x88\xa1"),
-               ENTITY_DEF("fllig", 64258, "\xef\xac\x82"),
-               ENTITY_DEF("cuvee", 8910, "\xe2\x8b\x8e"),
-               ENTITY_DEF("wedbar", 10847, "\xe2\xa9\x9f"),
-               ENTITY_DEF("Fscr", 8497, "\xe2\x84\xb1"),
-               ENTITY_DEF("veebar", 8891, "\xe2\x8a\xbb"),
-               ENTITY_DEF("Longleftrightarrow", 10234, "\xe2\x9f\xba"),
-               ENTITY_DEF("reg", 174, "\xc2\xae"),
-               ENTITY_DEF("NegativeMediumSpace", 8203, "\xe2\x80\x8b"),
-               ENTITY_DEF("Upsi", 978, "\xcf\x92"),
-               ENTITY_DEF("Mellintrf", 8499, "\xe2\x84\xb3"),
-               ENTITY_DEF("boxHU", 9577, "\xe2\x95\xa9"),
-               ENTITY_DEF("frac56", 8538, "\xe2\x85\x9a"),
-               ENTITY_DEF("utrif", 9652, "\xe2\x96\xb4"),
-               ENTITY_DEF("LeftTriangle", 8882, "\xe2\x8a\xb2"),
-               ENTITY_DEF("nsime", 8772, "\xe2\x89\x84"),
-               ENTITY_DEF("rcedil", 343, "\xc5\x97"),
-               ENTITY_DEF("aogon", 261, "\xc4\x85"),
-               ENTITY_DEF("uHar", 10595, "\xe2\xa5\xa3"),
-               ENTITY_DEF("ForAll", 8704, "\xe2\x88\x80"),
-               ENTITY_DEF("prE", 10931, "\xe2\xaa\xb3"),
-               ENTITY_DEF("boxV", 9553, "\xe2\x95\x91"),
-               ENTITY_DEF("softcy", 1100, "\xd1\x8c"),
-               ENTITY_DEF("hercon", 8889, "\xe2\x8a\xb9"),
-               ENTITY_DEF("lmoustache", 9136, "\xe2\x8e\xb0"),
-               ENTITY_DEF("Product", 8719, "\xe2\x88\x8f"),
-               ENTITY_DEF("lsimg", 10895, "\xe2\xaa\x8f"),
-               ENTITY_DEF("verbar", 124, "\x7c"),
-               ENTITY_DEF("ofcir", 10687, "\xe2\xa6\xbf"),
-               ENTITY_DEF("curlyeqprec", 8926, "\xe2\x8b\x9e"),
-               ENTITY_DEF("ldquo", 8220, "\xe2\x80\x9c"),
-               ENTITY_DEF("bot", 8869, "\xe2\x8a\xa5"),
-               ENTITY_DEF("Psi", 936, "\xce\xa8"),
-               ENTITY_DEF("OElig", 338, "\xc5\x92"),
-               ENTITY_DEF("DownRightVectorBar", 10583, "\xe2\xa5\x97"),
-               ENTITY_DEF("minusb", 8863, "\xe2\x8a\x9f"),
-               ENTITY_DEF("Iscr", 8464, "\xe2\x84\x90"),
-               ENTITY_DEF("Tcedil", 354, "\xc5\xa2"),
-               ENTITY_DEF("ffilig", 64259, "\xef\xac\x83"),
-               ENTITY_DEF("Gcy", 1043, "\xd0\x93"),
-               ENTITY_DEF("oline", 8254, "\xe2\x80\xbe"),
-               ENTITY_DEF("bottom", 8869, "\xe2\x8a\xa5"),
-               ENTITY_DEF("nVDash", 8879, "\xe2\x8a\xaf"),
-               ENTITY_DEF("lessdot", 8918, "\xe2\x8b\x96"),
-               ENTITY_DEF("cups", 8746, "\xe2\x88\xaa\xef\xb8\x80"),
-               ENTITY_DEF("gla", 10917, "\xe2\xaa\xa5"),
-               ENTITY_DEF("hellip", 8230, "\xe2\x80\xa6"),
-               ENTITY_DEF("hookleftarrow", 8617, "\xe2\x86\xa9"),
-               ENTITY_DEF("Cup", 8915, "\xe2\x8b\x93"),
-               ENTITY_DEF("upsi", 965, "\xcf\x85"),
-               ENTITY_DEF("DownArrowBar", 10515, "\xe2\xa4\x93"),
-               ENTITY_DEF("lowast", 8727, "\xe2\x88\x97"),
-               ENTITY_DEF("profline", 8978, "\xe2\x8c\x92"),
-               ENTITY_DEF("ngsim", 8821, "\xe2\x89\xb5"),
-               ENTITY_DEF("boxhu", 9524, "\xe2\x94\xb4"),
-               ENTITY_DEF("operp", 10681, "\xe2\xa6\xb9"),
-               ENTITY_DEF("cap", 8745, "\xe2\x88\xa9"),
-               ENTITY_DEF("Hcirc", 292, "\xc4\xa4"),
-               ENTITY_DEF("Ncy", 1053, "\xd0\x9d"),
-               ENTITY_DEF("zeetrf", 8488, "\xe2\x84\xa8"),
-               ENTITY_DEF("cuepr", 8926, "\xe2\x8b\x9e"),
-               ENTITY_DEF("supsetneq", 8843, "\xe2\x8a\x8b"),
-               ENTITY_DEF("lfloor", 8970, "\xe2\x8c\x8a"),
-               ENTITY_DEF("ngtr", 8815, "\xe2\x89\xaf"),
-               ENTITY_DEF("ccups", 10828, "\xe2\xa9\x8c"),
-               ENTITY_DEF("pscr", 120005, "\xf0\x9d\x93\x85"),
-               ENTITY_DEF("Cfr", 8493, "\xe2\x84\xad"),
-               ENTITY_DEF("dtri", 9663, "\xe2\x96\xbf"),
-               ENTITY_DEF("icirc", 238, "\xc3\xae"),
-               ENTITY_DEF("leftarrow", 8592, "\xe2\x86\x90"),
-               ENTITY_DEF("vdash", 8866, "\xe2\x8a\xa2"),
-               ENTITY_DEF("leftrightharpoons", 8651, "\xe2\x87\x8b"),
-               ENTITY_DEF("rightrightarrows", 8649, "\xe2\x87\x89"),
-               ENTITY_DEF("strns", 175, "\xc2\xaf"),
-               ENTITY_DEF("intlarhk", 10775, "\xe2\xa8\x97"),
-               ENTITY_DEF("downharpoonright", 8642, "\xe2\x87\x82"),
-               ENTITY_DEF("yacute", 253, "\xc3\xbd"),
-               ENTITY_DEF("boxUr", 9561, "\xe2\x95\x99"),
-               ENTITY_DEF("triangleleft", 9667, "\xe2\x97\x83"),
-               ENTITY_DEF("DiacriticalDot", 729, "\xcb\x99"),
-               ENTITY_DEF("thetav", 977, "\xcf\x91"),
-               ENTITY_DEF("OverBracket", 9140, "\xe2\x8e\xb4"),
-               ENTITY_DEF("PrecedesTilde", 8830, "\xe2\x89\xbe"),
-               ENTITY_DEF("rtrie", 8885, "\xe2\x8a\xb5"),
-               ENTITY_DEF("Scirc", 348, "\xc5\x9c"),
-               ENTITY_DEF("vsupne", 8843, "\xe2\x8a\x8b\xef\xb8\x80"),
-               ENTITY_DEF("OverBrace", 9182, "\xe2\x8f\x9e"),
-               ENTITY_DEF("Yfr", 120092, "\xf0\x9d\x94\x9c"),
-               ENTITY_DEF("scnE", 10934, "\xe2\xaa\xb6"),
-               ENTITY_DEF("simlE", 10911, "\xe2\xaa\x9f"),
-               ENTITY_DEF("Proportional", 8733, "\xe2\x88\x9d"),
-               ENTITY_DEF("edot", 279, "\xc4\x97"),
-               ENTITY_DEF("loang", 10220, "\xe2\x9f\xac"),
-               ENTITY_DEF("gesdot", 10880, "\xe2\xaa\x80"),
-               ENTITY_DEF("DownBreve", 785, "\xcc\x91"),
-               ENTITY_DEF("pcy", 1087, "\xd0\xbf"),
-               ENTITY_DEF("Succeeds", 8827, "\xe2\x89\xbb"),
-               ENTITY_DEF("mfr", 120106, "\xf0\x9d\x94\xaa"),
-               ENTITY_DEF("Leftarrow", 8656, "\xe2\x87\x90"),
-               ENTITY_DEF("boxDr", 9555, "\xe2\x95\x93"),
-               ENTITY_DEF("Nscr", 119977, "\xf0\x9d\x92\xa9"),
-               ENTITY_DEF("diam", 8900, "\xe2\x8b\x84"),
-               ENTITY_DEF("CHcy", 1063, "\xd0\xa7"),
-               ENTITY_DEF("boxdr", 9484, "\xe2\x94\x8c"),
-               ENTITY_DEF("rlm", 8207, "\xe2\x80\x8f"),
-               ENTITY_DEF("Coproduct", 8720, "\xe2\x88\x90"),
-               ENTITY_DEF("RightTeeArrow", 8614, "\xe2\x86\xa6"),
-               ENTITY_DEF("tridot", 9708, "\xe2\x97\xac"),
-               ENTITY_DEF("ldquor", 8222, "\xe2\x80\x9e"),
-               ENTITY_DEF("sol", 47, "\x2f"),
-               ENTITY_DEF("ecirc", 234, "\xc3\xaa"),
-               ENTITY_DEF("DoubleLeftArrow", 8656, "\xe2\x87\x90"),
-               ENTITY_DEF("Gscr", 119970, "\xf0\x9d\x92\xa2"),
-               ENTITY_DEF("ap", 8776, "\xe2\x89\x88"),
-               ENTITY_DEF("rbrke", 10636, "\xe2\xa6\x8c"),
-               ENTITY_DEF("LeftFloor", 8970, "\xe2\x8c\x8a"),
-               ENTITY_DEF("blk12", 9618, "\xe2\x96\x92"),
-               ENTITY_DEF("Conint", 8751, "\xe2\x88\xaf"),
-               ENTITY_DEF("triangledown", 9663, "\xe2\x96\xbf"),
-               ENTITY_DEF("Icy", 1048, "\xd0\x98"),
-               ENTITY_DEF("backprime", 8245, "\xe2\x80\xb5"),
-               ENTITY_DEF("longleftrightarrow", 10231, "\xe2\x9f\xb7"),
-               ENTITY_DEF("ntriangleleft", 8938, "\xe2\x8b\xaa"),
-               ENTITY_DEF("copy", 169, "\xc2\xa9"),
-               ENTITY_DEF("mapstodown", 8615, "\xe2\x86\xa7"),
-               ENTITY_DEF("seArr", 8664, "\xe2\x87\x98"),
-               ENTITY_DEF("ENG", 330, "\xc5\x8a"),
-               ENTITY_DEF("DoubleRightArrow", 8658, "\xe2\x87\x92"),
-               ENTITY_DEF("tfr", 120113, "\xf0\x9d\x94\xb1"),
-               ENTITY_DEF("rharul", 10604, "\xe2\xa5\xac"),
-               ENTITY_DEF("bfr", 120095, "\xf0\x9d\x94\x9f"),
-               ENTITY_DEF("origof", 8886, "\xe2\x8a\xb6"),
-               ENTITY_DEF("Therefore", 8756, "\xe2\x88\xb4"),
-               ENTITY_DEF("glE", 10898, "\xe2\xaa\x92"),
-               ENTITY_DEF("leftarrowtail", 8610, "\xe2\x86\xa2"),
-               ENTITY_DEF("NotEqual", 8800, "\xe2\x89\xa0"),
-               ENTITY_DEF("LeftCeiling", 8968, "\xe2\x8c\x88"),
-               ENTITY_DEF("lArr", 8656, "\xe2\x87\x90"),
-               ENTITY_DEF("subseteq", 8838, "\xe2\x8a\x86"),
-               ENTITY_DEF("larrbfs", 10527, "\xe2\xa4\x9f"),
-               ENTITY_DEF("Gammad", 988, "\xcf\x9c"),
-               ENTITY_DEF("rtriltri", 10702, "\xe2\xa7\x8e"),
-               ENTITY_DEF("Fcy", 1060, "\xd0\xa4"),
-               ENTITY_DEF("Vopf", 120141, "\xf0\x9d\x95\x8d"),
-               ENTITY_DEF("lrarr", 8646, "\xe2\x87\x86"),
-               ENTITY_DEF("delta", 948, "\xce\xb4"),
-               ENTITY_DEF("xodot", 10752, "\xe2\xa8\x80"),
-               ENTITY_DEF("larrtl", 8610, "\xe2\x86\xa2"),
-               ENTITY_DEF("gsim", 8819, "\xe2\x89\xb3"),
-               ENTITY_DEF("ratail", 10522, "\xe2\xa4\x9a"),
-               ENTITY_DEF("vsubne", 8842, "\xe2\x8a\x8a\xef\xb8\x80"),
-               ENTITY_DEF("boxur", 9492, "\xe2\x94\x94"),
-               ENTITY_DEF("succsim", 8831, "\xe2\x89\xbf"),
-               ENTITY_DEF("triplus", 10809, "\xe2\xa8\xb9"),
-               ENTITY_DEF("nless", 8814, "\xe2\x89\xae"),
-               ENTITY_DEF("uharr", 8638, "\xe2\x86\xbe"),
-               ENTITY_DEF("lambda", 955, "\xce\xbb"),
-               ENTITY_DEF("uuml", 252, "\xc3\xbc"),
-               ENTITY_DEF("horbar", 8213, "\xe2\x80\x95"),
-               ENTITY_DEF("ccirc", 265, "\xc4\x89"),
-               ENTITY_DEF("sqcup", 8852, "\xe2\x8a\x94"),
-               ENTITY_DEF("Pscr", 119979, "\xf0\x9d\x92\xab"),
-               ENTITY_DEF("supsup", 10966, "\xe2\xab\x96"),
-               ENTITY_DEF("Cacute", 262, "\xc4\x86"),
-               ENTITY_DEF("upsih", 978, "\xcf\x92"),
-               ENTITY_DEF("precsim", 8830, "\xe2\x89\xbe"),
-               ENTITY_DEF("longrightarrow", 10230, "\xe2\x9f\xb6"),
-               ENTITY_DEF("circledR", 174, "\xc2\xae"),
-               ENTITY_DEF("UpTeeArrow", 8613, "\xe2\x86\xa5"),
-               ENTITY_DEF("bepsi", 1014, "\xcf\xb6"),
-               ENTITY_DEF("oast", 8859, "\xe2\x8a\x9b"),
-               ENTITY_DEF("yfr", 120118, "\xf0\x9d\x94\xb6"),
-               ENTITY_DEF("rdsh", 8627, "\xe2\x86\xb3"),
-               ENTITY_DEF("Ograve", 210, "\xc3\x92"),
-               ENTITY_DEF("LeftVectorBar", 10578, "\xe2\xa5\x92"),
-               ENTITY_DEF("NotNestedLessLess", 10913, "\xe2\xaa\xa1\xcc\xb8"),
-               ENTITY_DEF("Jscr", 119973, "\xf0\x9d\x92\xa5"),
-               ENTITY_DEF("psi", 968, "\xcf\x88"),
-               ENTITY_DEF("orarr", 8635, "\xe2\x86\xbb"),
-               ENTITY_DEF("Subset", 8912, "\xe2\x8b\x90"),
-               ENTITY_DEF("curarr", 8631, "\xe2\x86\xb7"),
-               ENTITY_DEF("CirclePlus", 8853, "\xe2\x8a\x95"),
-               ENTITY_DEF("gtrless", 8823, "\xe2\x89\xb7"),
-               ENTITY_DEF("nvle", 8804, "\xe2\x89\xa4\xe2\x83\x92"),
-               ENTITY_DEF("prop", 8733, "\xe2\x88\x9d"),
-               ENTITY_DEF("gEl", 10892, "\xe2\xaa\x8c"),
-               ENTITY_DEF("gtlPar", 10645, "\xe2\xa6\x95"),
-               ENTITY_DEF("frasl", 8260, "\xe2\x81\x84"),
-               ENTITY_DEF("nearr", 8599, "\xe2\x86\x97"),
-               ENTITY_DEF("NotSubsetEqual", 8840, "\xe2\x8a\x88"),
-               ENTITY_DEF("planck", 8463, "\xe2\x84\x8f"),
-               ENTITY_DEF("Uuml", 220, "\xc3\x9c"),
-               ENTITY_DEF("spadesuit", 9824, "\xe2\x99\xa0"),
-               ENTITY_DEF("sect", 167, "\xc2\xa7"),
-               ENTITY_DEF("cdot", 267, "\xc4\x8b"),
-               ENTITY_DEF("boxVh", 9579, "\xe2\x95\xab"),
-               ENTITY_DEF("zscr", 120015, "\xf0\x9d\x93\x8f"),
-               ENTITY_DEF("nsqsube", 8930, "\xe2\x8b\xa2"),
-               ENTITY_DEF("grave", 96, "\x60"),
-               ENTITY_DEF("angrtvb", 8894, "\xe2\x8a\xbe"),
-               ENTITY_DEF("MediumSpace", 8287, "\xe2\x81\x9f"),
-               ENTITY_DEF("Ntilde", 209, "\xc3\x91"),
-               ENTITY_DEF("solb", 10692, "\xe2\xa7\x84"),
-               ENTITY_DEF("angzarr", 9084, "\xe2\x8d\xbc"),
-               ENTITY_DEF("nopf", 120159, "\xf0\x9d\x95\x9f"),
-               ENTITY_DEF("rtrif", 9656, "\xe2\x96\xb8"),
-               ENTITY_DEF("nrightarrow", 8603, "\xe2\x86\x9b"),
-               ENTITY_DEF("Kappa", 922, "\xce\x9a"),
-               ENTITY_DEF("simrarr", 10610, "\xe2\xa5\xb2"),
-               ENTITY_DEF("imacr", 299, "\xc4\xab"),
-               ENTITY_DEF("vrtri", 8883, "\xe2\x8a\xb3"),
-               ENTITY_DEF("part", 8706, "\xe2\x88\x82"),
-               ENTITY_DEF("esim", 8770, "\xe2\x89\x82"),
-               ENTITY_DEF("atilde", 227, "\xc3\xa3"),
-               ENTITY_DEF("DownRightTeeVector", 10591, "\xe2\xa5\x9f"),
-               ENTITY_DEF("jcirc", 309, "\xc4\xb5"),
-               ENTITY_DEF("Ecaron", 282, "\xc4\x9a"),
-               ENTITY_DEF("VerticalSeparator", 10072, "\xe2\x9d\x98"),
-               ENTITY_DEF("rHar", 10596, "\xe2\xa5\xa4"),
-               ENTITY_DEF("rcaron", 345, "\xc5\x99"),
-               ENTITY_DEF("subnE", 10955, "\xe2\xab\x8b"),
-               ENTITY_DEF("ii", 8520, "\xe2\x85\x88"),
-               ENTITY_DEF("Cconint", 8752, "\xe2\x88\xb0"),
-               ENTITY_DEF("Mcy", 1052, "\xd0\x9c"),
-               ENTITY_DEF("eqcolon", 8789, "\xe2\x89\x95"),
-               ENTITY_DEF("cupor", 10821, "\xe2\xa9\x85"),
-               ENTITY_DEF("DoubleUpArrow", 8657, "\xe2\x87\x91"),
-               ENTITY_DEF("boxbox", 10697, "\xe2\xa7\x89"),
-               ENTITY_DEF("setminus", 8726, "\xe2\x88\x96"),
-               ENTITY_DEF("Lleftarrow", 8666, "\xe2\x87\x9a"),
-               ENTITY_DEF("nang", 8736, "\xe2\x88\xa0\xe2\x83\x92"),
-               ENTITY_DEF("TRADE", 8482, "\xe2\x84\xa2"),
-               ENTITY_DEF("urcorner", 8989, "\xe2\x8c\x9d"),
-               ENTITY_DEF("lsqb", 91, "\x5b"),
-               ENTITY_DEF("cupcup", 10826, "\xe2\xa9\x8a"),
-               ENTITY_DEF("kjcy", 1116, "\xd1\x9c"),
-               ENTITY_DEF("llhard", 10603, "\xe2\xa5\xab"),
-               ENTITY_DEF("mumap", 8888, "\xe2\x8a\xb8"),
-               ENTITY_DEF("iiint", 8749, "\xe2\x88\xad"),
-               ENTITY_DEF("RightTee", 8866, "\xe2\x8a\xa2"),
-               ENTITY_DEF("Tcaron", 356, "\xc5\xa4"),
-               ENTITY_DEF("bigcirc", 9711, "\xe2\x97\xaf"),
-               ENTITY_DEF("trianglerighteq", 8885, "\xe2\x8a\xb5"),
-               ENTITY_DEF("NotLessGreater", 8824, "\xe2\x89\xb8"),
-               ENTITY_DEF("hArr", 8660, "\xe2\x87\x94"),
-               ENTITY_DEF("ocy", 1086, "\xd0\xbe"),
-               ENTITY_DEF("tosa", 10537, "\xe2\xa4\xa9"),
-               ENTITY_DEF("twixt", 8812, "\xe2\x89\xac"),
-               ENTITY_DEF("square", 9633, "\xe2\x96\xa1"),
-               ENTITY_DEF("Otimes", 10807, "\xe2\xa8\xb7"),
-               ENTITY_DEF("Kcedil", 310, "\xc4\xb6"),
-               ENTITY_DEF("beth", 8502, "\xe2\x84\xb6"),
-               ENTITY_DEF("triminus", 10810, "\xe2\xa8\xba"),
-               ENTITY_DEF("nlArr", 8653, "\xe2\x87\x8d"),
-               ENTITY_DEF("Oacute", 211, "\xc3\x93"),
-               ENTITY_DEF("zwnj", 8204, "\xe2\x80\x8c"),
-               ENTITY_DEF("ll", 8810, "\xe2\x89\xaa"),
-               ENTITY_DEF("smashp", 10803, "\xe2\xa8\xb3"),
-               ENTITY_DEF("ngeqq", 8807, "\xe2\x89\xa7\xcc\xb8"),
-               ENTITY_DEF("rnmid", 10990, "\xe2\xab\xae"),
-               ENTITY_DEF("nwArr", 8662, "\xe2\x87\x96"),
-               ENTITY_DEF("RightUpDownVector", 10575, "\xe2\xa5\x8f"),
-               ENTITY_DEF("lbbrk", 10098, "\xe2\x9d\xb2"),
-               ENTITY_DEF("compfn", 8728, "\xe2\x88\x98"),
-               ENTITY_DEF("eDDot", 10871, "\xe2\xa9\xb7"),
-               ENTITY_DEF("Jsercy", 1032, "\xd0\x88"),
-               ENTITY_DEF("HARDcy", 1066, "\xd0\xaa"),
-               ENTITY_DEF("nexists", 8708, "\xe2\x88\x84"),
-               ENTITY_DEF("theta", 952, "\xce\xb8"),
-               ENTITY_DEF("plankv", 8463, "\xe2\x84\x8f"),
-               ENTITY_DEF("sup2", 178, "\xc2\xb2"),
-               ENTITY_DEF("lessapprox", 10885, "\xe2\xaa\x85"),
-               ENTITY_DEF("gdot", 289, "\xc4\xa1"),
-               ENTITY_DEF("angmsdae", 10668, "\xe2\xa6\xac"),
-               ENTITY_DEF("Superset", 8835, "\xe2\x8a\x83"),
-               ENTITY_DEF("prap", 10935, "\xe2\xaa\xb7"),
-               ENTITY_DEF("Zscr", 119989, "\xf0\x9d\x92\xb5"),
-               ENTITY_DEF("nsucc", 8833, "\xe2\x8a\x81"),
-               ENTITY_DEF("supseteqq", 10950, "\xe2\xab\x86"),
-               ENTITY_DEF("UpTee", 8869, "\xe2\x8a\xa5"),
-               ENTITY_DEF("LowerLeftArrow", 8601, "\xe2\x86\x99"),
-               ENTITY_DEF("ssmile", 8995, "\xe2\x8c\xa3"),
-               ENTITY_DEF("niv", 8715, "\xe2\x88\x8b"),
-               ENTITY_DEF("bigvee", 8897, "\xe2\x8b\x81"),
-               ENTITY_DEF("kscr", 120000, "\xf0\x9d\x93\x80"),
-               ENTITY_DEF("xutri", 9651, "\xe2\x96\xb3"),
-               ENTITY_DEF("caret", 8257, "\xe2\x81\x81"),
-               ENTITY_DEF("caron", 711, "\xcb\x87"),
-               ENTITY_DEF("Wedge", 8896, "\xe2\x8b\x80"),
-               ENTITY_DEF("sdotb", 8865, "\xe2\x8a\xa1"),
-               ENTITY_DEF("bigoplus", 10753, "\xe2\xa8\x81"),
-               ENTITY_DEF("Breve", 728, "\xcb\x98"),
-               ENTITY_DEF("ImaginaryI", 8520, "\xe2\x85\x88"),
-               ENTITY_DEF("longmapsto", 10236, "\xe2\x9f\xbc"),
-               ENTITY_DEF("boxVH", 9580, "\xe2\x95\xac"),
-               ENTITY_DEF("lozenge", 9674, "\xe2\x97\x8a"),
-               ENTITY_DEF("toea", 10536, "\xe2\xa4\xa8"),
-               ENTITY_DEF("nbumpe", 8783, "\xe2\x89\x8f\xcc\xb8"),
-               ENTITY_DEF("gcirc", 285, "\xc4\x9d"),
-               ENTITY_DEF("NotHumpEqual", 8783, "\xe2\x89\x8f\xcc\xb8"),
-               ENTITY_DEF("pre", 10927, "\xe2\xaa\xaf"),
-               ENTITY_DEF("ascr", 119990, "\xf0\x9d\x92\xb6"),
-               ENTITY_DEF("Acirc", 194, "\xc3\x82"),
-               ENTITY_DEF("questeq", 8799, "\xe2\x89\x9f"),
-               ENTITY_DEF("ncaron", 328, "\xc5\x88"),
-               ENTITY_DEF("LeftTeeArrow", 8612, "\xe2\x86\xa4"),
-               ENTITY_DEF("xcirc", 9711, "\xe2\x97\xaf"),
-               ENTITY_DEF("swarr", 8601, "\xe2\x86\x99"),
-               ENTITY_DEF("MinusPlus", 8723, "\xe2\x88\x93"),
-               ENTITY_DEF("plus", 43, "\x2b"),
-               ENTITY_DEF("NotDoubleVerticalBar", 8742, "\xe2\x88\xa6"),
-               ENTITY_DEF("rppolint", 10770, "\xe2\xa8\x92"),
-               ENTITY_DEF("NotTildeFullEqual", 8775, "\xe2\x89\x87"),
-               ENTITY_DEF("ltdot", 8918, "\xe2\x8b\x96"),
-               ENTITY_DEF("NotNestedGreaterGreater", 10914, "\xe2\xaa\xa2\xcc\xb8"),
-               ENTITY_DEF("Lscr", 8466, "\xe2\x84\x92"),
-               ENTITY_DEF("pitchfork", 8916, "\xe2\x8b\x94"),
-               ENTITY_DEF("Eopf", 120124, "\xf0\x9d\x94\xbc"),
-               ENTITY_DEF("ropf", 120163, "\xf0\x9d\x95\xa3"),
-               ENTITY_DEF("Delta", 916, "\xce\x94"),
-               ENTITY_DEF("lozf", 10731, "\xe2\xa7\xab"),
-               ENTITY_DEF("RightTeeVector", 10587, "\xe2\xa5\x9b"),
-               ENTITY_DEF("UpDownArrow", 8597, "\xe2\x86\x95"),
-               ENTITY_DEF("bump", 8782, "\xe2\x89\x8e"),
-               ENTITY_DEF("Rscr", 8475, "\xe2\x84\x9b"),
-               ENTITY_DEF("slarr", 8592, "\xe2\x86\x90"),
-               ENTITY_DEF("lcy", 1083, "\xd0\xbb"),
-               ENTITY_DEF("Vee", 8897, "\xe2\x8b\x81"),
-               ENTITY_DEF("Iogon", 302, "\xc4\xae"),
-               ENTITY_DEF("minus", 8722, "\xe2\x88\x92"),
-               ENTITY_DEF("GreaterFullEqual", 8807, "\xe2\x89\xa7"),
-               ENTITY_DEF("xhArr", 10234, "\xe2\x9f\xba"),
-               ENTITY_DEF("shortmid", 8739, "\xe2\x88\xa3"),
-               ENTITY_DEF("DoubleDownArrow", 8659, "\xe2\x87\x93"),
-               ENTITY_DEF("Wscr", 119986, "\xf0\x9d\x92\xb2"),
-               ENTITY_DEF("rang", 10217, "\xe2\x9f\xa9"),
-               ENTITY_DEF("lcub", 123, "\x7b"),
-               ENTITY_DEF("mnplus", 8723, "\xe2\x88\x93"),
-               ENTITY_DEF("ulcrop", 8975, "\xe2\x8c\x8f"),
-               ENTITY_DEF("wfr", 120116, "\xf0\x9d\x94\xb4"),
-               ENTITY_DEF("DifferentialD", 8518, "\xe2\x85\x86"),
-               ENTITY_DEF("ThinSpace", 8201, "\xe2\x80\x89"),
-               ENTITY_DEF("NotGreaterGreater", 8811, "\xe2\x89\xab\xcc\xb8"),
-               ENTITY_DEF("Topf", 120139, "\xf0\x9d\x95\x8b"),
-               ENTITY_DEF("sbquo", 8218, "\xe2\x80\x9a"),
-               ENTITY_DEF("sdot", 8901, "\xe2\x8b\x85"),
-               ENTITY_DEF("DoubleLeftTee", 10980, "\xe2\xab\xa4"),
-               ENTITY_DEF("vBarv", 10985, "\xe2\xab\xa9"),
-               ENTITY_DEF("subne", 8842, "\xe2\x8a\x8a"),
-               ENTITY_DEF("gtrdot", 8919, "\xe2\x8b\x97"),
-               ENTITY_DEF("opar", 10679, "\xe2\xa6\xb7"),
-               ENTITY_DEF("apid", 8779, "\xe2\x89\x8b"),
-               ENTITY_DEF("Cross", 10799, "\xe2\xa8\xaf"),
-               ENTITY_DEF("lhblk", 9604, "\xe2\x96\x84"),
-               ENTITY_DEF("capcap", 10827, "\xe2\xa9\x8b"),
-               ENTITY_DEF("midast", 42, "\x2a"),
-               ENTITY_DEF("lscr", 120001, "\xf0\x9d\x93\x81"),
-               ENTITY_DEF("nGt", 8811, "\xe2\x89\xab\xe2\x83\x92"),
-               ENTITY_DEF("Euml", 203, "\xc3\x8b"),
-               ENTITY_DEF("blacktriangledown", 9662, "\xe2\x96\xbe"),
-               ENTITY_DEF("Rcy", 1056, "\xd0\xa0"),
-               ENTITY_DEF("dfisht", 10623, "\xe2\xa5\xbf"),
-               ENTITY_DEF("dashv", 8867, "\xe2\x8a\xa3"),
-               ENTITY_DEF("ast", 42, "\x2a"),
-               ENTITY_DEF("ContourIntegral", 8750, "\xe2\x88\xae"),
-               ENTITY_DEF("Ofr", 120082, "\xf0\x9d\x94\x92"),
-               ENTITY_DEF("Lcy", 1051, "\xd0\x9b"),
-               ENTITY_DEF("nltrie", 8940, "\xe2\x8b\xac"),
-               ENTITY_DEF("ShortUpArrow", 8593, "\xe2\x86\x91"),
-               ENTITY_DEF("acy", 1072, "\xd0\xb0"),
-               ENTITY_DEF("rightarrow", 8594, "\xe2\x86\x92"),
-               ENTITY_DEF("UnderBar", 95, "\x5f"),
-               ENTITY_DEF("LongLeftArrow", 10229, "\xe2\x9f\xb5"),
-               ENTITY_DEF("andd", 10844, "\xe2\xa9\x9c"),
-               ENTITY_DEF("xlarr", 10229, "\xe2\x9f\xb5"),
-               ENTITY_DEF("percnt", 37, "\x25"),
-               ENTITY_DEF("rharu", 8640, "\xe2\x87\x80"),
-               ENTITY_DEF("plusdo", 8724, "\xe2\x88\x94"),
-               ENTITY_DEF("TScy", 1062, "\xd0\xa6"),
-               ENTITY_DEF("kcy", 1082, "\xd0\xba"),
-               ENTITY_DEF("boxVR", 9568, "\xe2\x95\xa0"),
-               ENTITY_DEF("looparrowleft", 8619, "\xe2\x86\xab"),
-               ENTITY_DEF("scirc", 349, "\xc5\x9d"),
-               ENTITY_DEF("drcorn", 8991, "\xe2\x8c\x9f"),
-               ENTITY_DEF("iiota", 8489, "\xe2\x84\xa9"),
-               ENTITY_DEF("Zcy", 1047, "\xd0\x97"),
-               ENTITY_DEF("frac58", 8541, "\xe2\x85\x9d"),
-               ENTITY_DEF("alpha", 945, "\xce\xb1"),
-               ENTITY_DEF("daleth", 8504, "\xe2\x84\xb8"),
-               ENTITY_DEF("gtreqless", 8923, "\xe2\x8b\x9b"),
-               ENTITY_DEF("tstrok", 359, "\xc5\xa7"),
-               ENTITY_DEF("plusb", 8862, "\xe2\x8a\x9e"),
-               ENTITY_DEF("odsold", 10684, "\xe2\xa6\xbc"),
-               ENTITY_DEF("varsupsetneqq", 10956, "\xe2\xab\x8c\xef\xb8\x80"),
-               ENTITY_DEF("otilde", 245, "\xc3\xb5"),
-               ENTITY_DEF("gtcir", 10874, "\xe2\xa9\xba"),
-               ENTITY_DEF("lltri", 9722, "\xe2\x97\xba"),
-               ENTITY_DEF("rx", 8478, "\xe2\x84\x9e"),
-               ENTITY_DEF("ljcy", 1113, "\xd1\x99"),
-               ENTITY_DEF("parsim", 10995, "\xe2\xab\xb3"),
-               ENTITY_DEF("NotElement", 8713, "\xe2\x88\x89"),
-               ENTITY_DEF("plusmn", 177, "\xc2\xb1"),
-               ENTITY_DEF("varsubsetneq", 8842, "\xe2\x8a\x8a\xef\xb8\x80"),
-               ENTITY_DEF("subset", 8834, "\xe2\x8a\x82"),
-               ENTITY_DEF("awint", 10769, "\xe2\xa8\x91"),
-               ENTITY_DEF("laemptyv", 10676, "\xe2\xa6\xb4"),
-               ENTITY_DEF("phiv", 981, "\xcf\x95"),
-               ENTITY_DEF("sfrown", 8994, "\xe2\x8c\xa2"),
-               ENTITY_DEF("DoubleUpDownArrow", 8661, "\xe2\x87\x95"),
-               ENTITY_DEF("lpar", 40, "\x28"),
-               ENTITY_DEF("frac45", 8536, "\xe2\x85\x98"),
-               ENTITY_DEF("rBarr", 10511, "\xe2\xa4\x8f"),
-               ENTITY_DEF("npolint", 10772, "\xe2\xa8\x94"),
-               ENTITY_DEF("emacr", 275, "\xc4\x93"),
-               ENTITY_DEF("maltese", 10016, "\xe2\x9c\xa0"),
-               ENTITY_DEF("PlusMinus", 177, "\xc2\xb1"),
-               ENTITY_DEF("ReverseEquilibrium", 8651, "\xe2\x87\x8b"),
-               ENTITY_DEF("oscr", 8500, "\xe2\x84\xb4"),
-               ENTITY_DEF("blacksquare", 9642, "\xe2\x96\xaa"),
-               ENTITY_DEF("TSHcy", 1035, "\xd0\x8b"),
-               ENTITY_DEF("gap", 10886, "\xe2\xaa\x86"),
-               ENTITY_DEF("xnis", 8955, "\xe2\x8b\xbb"),
-               ENTITY_DEF("Ll", 8920, "\xe2\x8b\x98"),
-               ENTITY_DEF("PrecedesEqual", 10927, "\xe2\xaa\xaf"),
-               ENTITY_DEF("incare", 8453, "\xe2\x84\x85"),
-               ENTITY_DEF("nharr", 8622, "\xe2\x86\xae"),
-               ENTITY_DEF("varnothing", 8709, "\xe2\x88\x85"),
-               ENTITY_DEF("ShortDownArrow", 8595, "\xe2\x86\x93"),
-               ENTITY_DEF("nbsp", 160, " "),
-               ENTITY_DEF("asympeq", 8781, "\xe2\x89\x8d"),
-               ENTITY_DEF("rbrkslu", 10640, "\xe2\xa6\x90"),
-               ENTITY_DEF("rho", 961, "\xcf\x81"),
-               ENTITY_DEF("Mscr", 8499, "\xe2\x84\xb3"),
-               ENTITY_DEF("eth", 240, "\xc3\xb0"),
-               ENTITY_DEF("suplarr", 10619, "\xe2\xa5\xbb"),
-               ENTITY_DEF("Tab", 9, "\x09"),
-               ENTITY_DEF("omicron", 959, "\xce\xbf"),
-               ENTITY_DEF("blacktriangle", 9652, "\xe2\x96\xb4"),
-               ENTITY_DEF("nldr", 8229, "\xe2\x80\xa5"),
-               ENTITY_DEF("downharpoonleft", 8643, "\xe2\x87\x83"),
-               ENTITY_DEF("circledcirc", 8858, "\xe2\x8a\x9a"),
-               ENTITY_DEF("leftleftarrows", 8647, "\xe2\x87\x87"),
-               ENTITY_DEF("NotHumpDownHump", 8782, "\xe2\x89\x8e\xcc\xb8"),
-               ENTITY_DEF("nvgt", 62, "\x3e\xe2\x83\x92"),
-               ENTITY_DEF("rhard", 8641, "\xe2\x87\x81"),
-               ENTITY_DEF("nGg", 8921, "\xe2\x8b\x99\xcc\xb8"),
-               ENTITY_DEF("lurdshar", 10570, "\xe2\xa5\x8a"),
-               ENTITY_DEF("cirE", 10691, "\xe2\xa7\x83"),
-               ENTITY_DEF("isinE", 8953, "\xe2\x8b\xb9"),
-               ENTITY_DEF("eparsl", 10723, "\xe2\xa7\xa3"),
-               ENTITY_DEF("RightAngleBracket", 10217, "\xe2\x9f\xa9"),
-               ENTITY_DEF("hcirc", 293, "\xc4\xa5"),
-               ENTITY_DEF("bumpeq", 8783, "\xe2\x89\x8f"),
-               ENTITY_DEF("cire", 8791, "\xe2\x89\x97"),
-               ENTITY_DEF("dotplus", 8724, "\xe2\x88\x94"),
-               ENTITY_DEF("itilde", 297, "\xc4\xa9"),
-               ENTITY_DEF("uwangle", 10663, "\xe2\xa6\xa7"),
-               ENTITY_DEF("rlhar", 8652, "\xe2\x87\x8c"),
-               ENTITY_DEF("rbrace", 125, "\x7d"),
-               ENTITY_DEF("mid", 8739, "\xe2\x88\xa3"),
-               ENTITY_DEF("el", 10905, "\xe2\xaa\x99"),
-               ENTITY_DEF("KJcy", 1036, "\xd0\x8c"),
-               ENTITY_DEF("odiv", 10808, "\xe2\xa8\xb8"),
-               ENTITY_DEF("amacr", 257, "\xc4\x81"),
-               ENTITY_DEF("qprime", 8279, "\xe2\x81\x97"),
-               ENTITY_DEF("tcedil", 355, "\xc5\xa3"),
-               ENTITY_DEF("UpArrowDownArrow", 8645, "\xe2\x87\x85"),
-               ENTITY_DEF("spades", 9824, "\xe2\x99\xa0"),
-               ENTITY_DEF("napos", 329, "\xc5\x89"),
-               ENTITY_DEF("straightepsilon", 1013, "\xcf\xb5"),
-               ENTITY_DEF("CupCap", 8781, "\xe2\x89\x8d"),
-               ENTITY_DEF("Oopf", 120134, "\xf0\x9d\x95\x86"),
-               ENTITY_DEF("sub", 8834, "\xe2\x8a\x82"),
-               ENTITY_DEF("ohm", 937, "\xce\xa9"),
-               ENTITY_DEF("UnderBrace", 9183, "\xe2\x8f\x9f"),
-               ENTITY_DEF("looparrowright", 8620, "\xe2\x86\xac"),
-               ENTITY_DEF("xotime", 10754, "\xe2\xa8\x82"),
-               ENTITY_DEF("ntgl", 8825, "\xe2\x89\xb9"),
-               ENTITY_DEF("minusdu", 10794, "\xe2\xa8\xaa"),
-               ENTITY_DEF("rarrb", 8677, "\xe2\x87\xa5"),
-               ENTITY_DEF("nvlArr", 10498, "\xe2\xa4\x82"),
-               ENTITY_DEF("triangle", 9653, "\xe2\x96\xb5"),
-               ENTITY_DEF("nacute", 324, "\xc5\x84"),
-               ENTITY_DEF("boxHD", 9574, "\xe2\x95\xa6"),
-               ENTITY_DEF("ratio", 8758, "\xe2\x88\xb6"),
-               ENTITY_DEF("larrsim", 10611, "\xe2\xa5\xb3"),
-               ENTITY_DEF("LessLess", 10913, "\xe2\xaa\xa1"),
-               ENTITY_DEF("yacy", 1103, "\xd1\x8f"),
-               ENTITY_DEF("ctdot", 8943, "\xe2\x8b\xaf"),
-               ENTITY_DEF("and", 8743, "\xe2\x88\xa7"),
-               ENTITY_DEF("lrtri", 8895, "\xe2\x8a\xbf"),
-               ENTITY_DEF("eDot", 8785, "\xe2\x89\x91"),
-               ENTITY_DEF("sqsub", 8847, "\xe2\x8a\x8f"),
-               ENTITY_DEF("real", 8476, "\xe2\x84\x9c"),
-               ENTITY_DEF("Dcy", 1044, "\xd0\x94"),
-               ENTITY_DEF("vartheta", 977, "\xcf\x91"),
-               ENTITY_DEF("nsub", 8836, "\xe2\x8a\x84"),
-               ENTITY_DEF("DownTee", 8868, "\xe2\x8a\xa4"),
-               ENTITY_DEF("acute", 180, "\xc2\xb4"),
-               ENTITY_DEF("GreaterLess", 8823, "\xe2\x89\xb7"),
-               ENTITY_DEF("supplus", 10944, "\xe2\xab\x80"),
-               ENTITY_DEF("Vbar", 10987, "\xe2\xab\xab"),
-               ENTITY_DEF("divideontimes", 8903, "\xe2\x8b\x87"),
-               ENTITY_DEF("lsim", 8818, "\xe2\x89\xb2"),
-               ENTITY_DEF("nearhk", 10532, "\xe2\xa4\xa4"),
-               ENTITY_DEF("nLtv", 8810, "\xe2\x89\xaa\xcc\xb8"),
-               ENTITY_DEF("RuleDelayed", 10740, "\xe2\xa7\xb4"),
-               ENTITY_DEF("smile", 8995, "\xe2\x8c\xa3"),
-               ENTITY_DEF("coprod", 8720, "\xe2\x88\x90"),
-               ENTITY_DEF("imof", 8887, "\xe2\x8a\xb7"),
-               ENTITY_DEF("ecy", 1101, "\xd1\x8d"),
-               ENTITY_DEF("RightCeiling", 8969, "\xe2\x8c\x89"),
-               ENTITY_DEF("dlcorn", 8990, "\xe2\x8c\x9e"),
-               ENTITY_DEF("Nu", 925, "\xce\x9d"),
-               ENTITY_DEF("frac18", 8539, "\xe2\x85\x9b"),
-               ENTITY_DEF("diamond", 8900, "\xe2\x8b\x84"),
-               ENTITY_DEF("Icirc", 206, "\xc3\x8e"),
-               ENTITY_DEF("ngeq", 8817, "\xe2\x89\xb1"),
-               ENTITY_DEF("epsilon", 949, "\xce\xb5"),
-               ENTITY_DEF("fork", 8916, "\xe2\x8b\x94"),
-               ENTITY_DEF("xrarr", 10230, "\xe2\x9f\xb6"),
-               ENTITY_DEF("racute", 341, "\xc5\x95"),
-               ENTITY_DEF("ntlg", 8824, "\xe2\x89\xb8"),
-               ENTITY_DEF("xvee", 8897, "\xe2\x8b\x81"),
-               ENTITY_DEF("LeftArrowRightArrow", 8646, "\xe2\x87\x86"),
-               ENTITY_DEF("DownLeftRightVector", 10576, "\xe2\xa5\x90"),
-               ENTITY_DEF("Eacute", 201, "\xc3\x89"),
-               ENTITY_DEF("gimel", 8503, "\xe2\x84\xb7"),
-               ENTITY_DEF("rtimes", 8906, "\xe2\x8b\x8a"),
-               ENTITY_DEF("forall", 8704, "\xe2\x88\x80"),
-               ENTITY_DEF("DiacriticalDoubleAcute", 733, "\xcb\x9d"),
-               ENTITY_DEF("dArr", 8659, "\xe2\x87\x93"),
-               ENTITY_DEF("fallingdotseq", 8786, "\xe2\x89\x92"),
-               ENTITY_DEF("Aogon", 260, "\xc4\x84"),
-               ENTITY_DEF("PartialD", 8706, "\xe2\x88\x82"),
-               ENTITY_DEF("mapstoup", 8613, "\xe2\x86\xa5"),
-               ENTITY_DEF("die", 168, "\xc2\xa8"),
-               ENTITY_DEF("ngt", 8815, "\xe2\x89\xaf"),
-               ENTITY_DEF("vcy", 1074, "\xd0\xb2"),
-               ENTITY_DEF("fjlig", 0, "\x66\x6a"),
-               ENTITY_DEF("submult", 10945, "\xe2\xab\x81"),
-               ENTITY_DEF("ubrcy", 1118, "\xd1\x9e"),
-               ENTITY_DEF("ovbar", 9021, "\xe2\x8c\xbd"),
-               ENTITY_DEF("bsime", 8909, "\xe2\x8b\x8d"),
-               ENTITY_DEF("precnsim", 8936, "\xe2\x8b\xa8"),
-               ENTITY_DEF("DiacriticalTilde", 732, "\xcb\x9c"),
-               ENTITY_DEF("cwint", 8753, "\xe2\x88\xb1"),
-               ENTITY_DEF("Scy", 1057, "\xd0\xa1"),
-               ENTITY_DEF("NotGreaterEqual", 8817, "\xe2\x89\xb1"),
-               ENTITY_DEF("boxUR", 9562, "\xe2\x95\x9a"),
-               ENTITY_DEF("LessSlantEqual", 10877, "\xe2\xa9\xbd"),
-               ENTITY_DEF("Barwed", 8966, "\xe2\x8c\x86"),
-               ENTITY_DEF("supdot", 10942, "\xe2\xaa\xbe"),
-               ENTITY_DEF("gel", 8923, "\xe2\x8b\x9b"),
-               ENTITY_DEF("iscr", 119998, "\xf0\x9d\x92\xbe"),
-               ENTITY_DEF("doublebarwedge", 8966, "\xe2\x8c\x86"),
-               ENTITY_DEF("Idot", 304, "\xc4\xb0"),
-               ENTITY_DEF("DoubleDot", 168, "\xc2\xa8"),
-               ENTITY_DEF("rsquo", 8217, "\xe2\x80\x99"),
-               ENTITY_DEF("subsetneqq", 10955, "\xe2\xab\x8b"),
-               ENTITY_DEF("UpEquilibrium", 10606, "\xe2\xa5\xae"),
-               ENTITY_DEF("copysr", 8471, "\xe2\x84\x97"),
-               ENTITY_DEF("RightDoubleBracket", 10215, "\xe2\x9f\xa7"),
-               ENTITY_DEF("LeftRightVector", 10574, "\xe2\xa5\x8e"),
-               ENTITY_DEF("DownLeftVectorBar", 10582, "\xe2\xa5\x96"),
-               ENTITY_DEF("suphsub", 10967, "\xe2\xab\x97"),
-               ENTITY_DEF("cedil", 184, "\xc2\xb8"),
-               ENTITY_DEF("prurel", 8880, "\xe2\x8a\xb0"),
-               ENTITY_DEF("imagpart", 8465, "\xe2\x84\x91"),
-               ENTITY_DEF("Hscr", 8459, "\xe2\x84\x8b"),
-               ENTITY_DEF("jmath", 567, "\xc8\xb7"),
-               ENTITY_DEF("nrtrie", 8941, "\xe2\x8b\xad"),
-               ENTITY_DEF("nsup", 8837, "\xe2\x8a\x85"),
-               ENTITY_DEF("Ubrcy", 1038, "\xd0\x8e"),
-               ENTITY_DEF("succnsim", 8937, "\xe2\x8b\xa9"),
-               ENTITY_DEF("nesim", 8770, "\xe2\x89\x82\xcc\xb8"),
-               ENTITY_DEF("varepsilon", 1013, "\xcf\xb5"),
-               ENTITY_DEF("DoubleRightTee", 8872, "\xe2\x8a\xa8"),
-               ENTITY_DEF("not", 172, "\xc2\xac"),
-               ENTITY_DEF("lesdot", 10879, "\xe2\xa9\xbf"),
-               ENTITY_DEF("backepsilon", 1014, "\xcf\xb6"),
-               ENTITY_DEF("srarr", 8594, "\xe2\x86\x92"),
-               ENTITY_DEF("varsubsetneqq", 10955, "\xe2\xab\x8b\xef\xb8\x80"),
-               ENTITY_DEF("sqcap", 8851, "\xe2\x8a\x93"),
-               ENTITY_DEF("rightleftarrows", 8644, "\xe2\x87\x84"),
-               ENTITY_DEF("diams", 9830, "\xe2\x99\xa6"),
-               ENTITY_DEF("boxdR", 9554, "\xe2\x95\x92"),
-               ENTITY_DEF("ngeqslant", 10878, "\xe2\xa9\xbe\xcc\xb8"),
-               ENTITY_DEF("boxDR", 9556, "\xe2\x95\x94"),
-               ENTITY_DEF("sext", 10038, "\xe2\x9c\xb6"),
-               ENTITY_DEF("backsim", 8765, "\xe2\x88\xbd"),
-               ENTITY_DEF("nfr", 120107, "\xf0\x9d\x94\xab"),
-               ENTITY_DEF("CloseCurlyDoubleQuote", 8221, "\xe2\x80\x9d"),
-               ENTITY_DEF("npart", 8706, "\xe2\x88\x82\xcc\xb8"),
-               ENTITY_DEF("dharl", 8643, "\xe2\x87\x83"),
-               ENTITY_DEF("NewLine", 10, "\x0a"),
-               ENTITY_DEF("bigotimes", 10754, "\xe2\xa8\x82"),
-               ENTITY_DEF("lAtail", 10523, "\xe2\xa4\x9b"),
-               ENTITY_DEF("frac14", 188, "\xc2\xbc"),
-               ENTITY_DEF("or", 8744, "\xe2\x88\xa8"),
-               ENTITY_DEF("subedot", 10947, "\xe2\xab\x83"),
-               ENTITY_DEF("nmid", 8740, "\xe2\x88\xa4"),
-               ENTITY_DEF("DownArrowUpArrow", 8693, "\xe2\x87\xb5"),
-               ENTITY_DEF("icy", 1080, "\xd0\xb8"),
-               ENTITY_DEF("num", 35, "\x23"),
-               ENTITY_DEF("Gdot", 288, "\xc4\xa0"),
-               ENTITY_DEF("urcrop", 8974, "\xe2\x8c\x8e"),
-               ENTITY_DEF("epsiv", 1013, "\xcf\xb5"),
-               ENTITY_DEF("topcir", 10993, "\xe2\xab\xb1"),
-               ENTITY_DEF("ne", 8800, "\xe2\x89\xa0"),
-               ENTITY_DEF("osol", 8856, "\xe2\x8a\x98"),
-               ENTITY_DEF("amp", 38, "\x26"),
-               ENTITY_DEF("ncap", 10819, "\xe2\xa9\x83"),
-               ENTITY_DEF("Sscr", 119982, "\xf0\x9d\x92\xae"),
-               ENTITY_DEF("sung", 9834, "\xe2\x99\xaa"),
-               ENTITY_DEF("ltri", 9667, "\xe2\x97\x83"),
-               ENTITY_DEF("frac25", 8534, "\xe2\x85\x96"),
-               ENTITY_DEF("DZcy", 1039, "\xd0\x8f"),
-               ENTITY_DEF("RightUpVector", 8638, "\xe2\x86\xbe"),
-               ENTITY_DEF("rsquor", 8217, "\xe2\x80\x99"),
-               ENTITY_DEF("uplus", 8846, "\xe2\x8a\x8e"),
-               ENTITY_DEF("triangleright", 9657, "\xe2\x96\xb9"),
-               ENTITY_DEF("lAarr", 8666, "\xe2\x87\x9a"),
-               ENTITY_DEF("HilbertSpace", 8459, "\xe2\x84\x8b"),
-               ENTITY_DEF("there4", 8756, "\xe2\x88\xb4"),
-               ENTITY_DEF("vscr", 120011, "\xf0\x9d\x93\x8b"),
-               ENTITY_DEF("cirscir", 10690, "\xe2\xa7\x82"),
-               ENTITY_DEF("roarr", 8702, "\xe2\x87\xbe"),
-               ENTITY_DEF("hslash", 8463, "\xe2\x84\x8f"),
-               ENTITY_DEF("supdsub", 10968, "\xe2\xab\x98"),
-               ENTITY_DEF("simg", 10910, "\xe2\xaa\x9e"),
-               ENTITY_DEF("trade", 8482, "\xe2\x84\xa2"),
-               ENTITY_DEF("searrow", 8600, "\xe2\x86\x98"),
-               ENTITY_DEF("DownLeftVector", 8637, "\xe2\x86\xbd"),
-               ENTITY_DEF("FilledSmallSquare", 9724, "\xe2\x97\xbc"),
-               ENTITY_DEF("prod", 8719, "\xe2\x88\x8f"),
-               ENTITY_DEF("oror", 10838, "\xe2\xa9\x96"),
-               ENTITY_DEF("udarr", 8645, "\xe2\x87\x85"),
-               ENTITY_DEF("jsercy", 1112, "\xd1\x98"),
-               ENTITY_DEF("tprime", 8244, "\xe2\x80\xb4"),
-               ENTITY_DEF("bprime", 8245, "\xe2\x80\xb5"),
-               ENTITY_DEF("malt", 10016, "\xe2\x9c\xa0"),
-               ENTITY_DEF("bigcup", 8899, "\xe2\x8b\x83"),
-               ENTITY_DEF("oint", 8750, "\xe2\x88\xae"),
-               ENTITY_DEF("female", 9792, "\xe2\x99\x80"),
-               ENTITY_DEF("omacr", 333, "\xc5\x8d"),
-               ENTITY_DEF("SquareSubsetEqual", 8849, "\xe2\x8a\x91"),
-               ENTITY_DEF("SucceedsEqual", 10928, "\xe2\xaa\xb0"),
-               ENTITY_DEF("plusacir", 10787, "\xe2\xa8\xa3"),
-               ENTITY_DEF("Gcirc", 284, "\xc4\x9c"),
-               ENTITY_DEF("lesdotor", 10883, "\xe2\xaa\x83"),
-               ENTITY_DEF("escr", 8495, "\xe2\x84\xaf"),
-               ENTITY_DEF("THORN", 222, "\xc3\x9e"),
-               ENTITY_DEF("UpArrowBar", 10514, "\xe2\xa4\x92"),
-               ENTITY_DEF("nvrtrie", 8885, "\xe2\x8a\xb5\xe2\x83\x92"),
-               ENTITY_DEF("varkappa", 1008, "\xcf\xb0"),
-               ENTITY_DEF("NotReverseElement", 8716, "\xe2\x88\x8c"),
-               ENTITY_DEF("zdot", 380, "\xc5\xbc"),
-               ENTITY_DEF("ExponentialE", 8519, "\xe2\x85\x87"),
-               ENTITY_DEF("lesseqgtr", 8922, "\xe2\x8b\x9a"),
-               ENTITY_DEF("cscr", 119992, "\xf0\x9d\x92\xb8"),
-               ENTITY_DEF("Dscr", 119967, "\xf0\x9d\x92\x9f"),
-               ENTITY_DEF("lthree", 8907, "\xe2\x8b\x8b"),
-               ENTITY_DEF("Ccedil", 199, "\xc3\x87"),
-               ENTITY_DEF("nge", 8817, "\xe2\x89\xb1"),
-               ENTITY_DEF("UpperLeftArrow", 8598, "\xe2\x86\x96"),
-               ENTITY_DEF("vDash", 8872, "\xe2\x8a\xa8"),
-               ENTITY_DEF("efDot", 8786, "\xe2\x89\x92"),
-               ENTITY_DEF("telrec", 8981, "\xe2\x8c\x95"),
-               ENTITY_DEF("vellip", 8942, "\xe2\x8b\xae"),
-               ENTITY_DEF("nrArr", 8655, "\xe2\x87\x8f"),
-               ENTITY_DEF("ugrave", 249, "\xc3\xb9"),
-               ENTITY_DEF("uring", 367, "\xc5\xaf"),
-               ENTITY_DEF("Bernoullis", 8492, "\xe2\x84\xac"),
-               ENTITY_DEF("nles", 10877, "\xe2\xa9\xbd\xcc\xb8"),
-               ENTITY_DEF("macr", 175, "\xc2\xaf"),
-               ENTITY_DEF("boxuR", 9560, "\xe2\x95\x98"),
-               ENTITY_DEF("clubsuit", 9827, "\xe2\x99\xa3"),
-               ENTITY_DEF("rightarrowtail", 8611, "\xe2\x86\xa3"),
-               ENTITY_DEF("epar", 8917, "\xe2\x8b\x95"),
-               ENTITY_DEF("ltcc", 10918, "\xe2\xaa\xa6"),
-               ENTITY_DEF("twoheadleftarrow", 8606, "\xe2\x86\x9e"),
-               ENTITY_DEF("aleph", 8501, "\xe2\x84\xb5"),
-               ENTITY_DEF("Colon", 8759, "\xe2\x88\xb7"),
-               ENTITY_DEF("vltri", 8882, "\xe2\x8a\xb2"),
-               ENTITY_DEF("quaternions", 8461, "\xe2\x84\x8d"),
-               ENTITY_DEF("rfr", 120111, "\xf0\x9d\x94\xaf"),
-               ENTITY_DEF("Ouml", 214, "\xc3\x96"),
-               ENTITY_DEF("rsh", 8625, "\xe2\x86\xb1"),
-               ENTITY_DEF("emptyv", 8709, "\xe2\x88\x85"),
-               ENTITY_DEF("sqsup", 8848, "\xe2\x8a\x90"),
-               ENTITY_DEF("marker", 9646, "\xe2\x96\xae"),
-               ENTITY_DEF("Efr", 120072, "\xf0\x9d\x94\x88"),
-               ENTITY_DEF("DotEqual", 8784, "\xe2\x89\x90"),
-               ENTITY_DEF("eqsim", 8770, "\xe2\x89\x82"),
-               ENTITY_DEF("NotSucceedsEqual", 10928, "\xe2\xaa\xb0\xcc\xb8"),
-               ENTITY_DEF("primes", 8473, "\xe2\x84\x99"),
-               ENTITY_DEF("times", 215, "\xc3\x97"),
-               ENTITY_DEF("rangd", 10642, "\xe2\xa6\x92"),
-               ENTITY_DEF("rightharpoonup", 8640, "\xe2\x87\x80"),
-               ENTITY_DEF("lrhard", 10605, "\xe2\xa5\xad"),
-               ENTITY_DEF("ape", 8778, "\xe2\x89\x8a"),
-               ENTITY_DEF("varsupsetneq", 8843, "\xe2\x8a\x8b\xef\xb8\x80"),
-               ENTITY_DEF("larrlp", 8619, "\xe2\x86\xab"),
-               ENTITY_DEF("NotPrecedesEqual", 10927, "\xe2\xaa\xaf\xcc\xb8"),
-               ENTITY_DEF("ulcorner", 8988, "\xe2\x8c\x9c"),
-               ENTITY_DEF("acd", 8767, "\xe2\x88\xbf"),
-               ENTITY_DEF("Hacek", 711, "\xcb\x87"),
-               ENTITY_DEF("xuplus", 10756, "\xe2\xa8\x84"),
-               ENTITY_DEF("therefore", 8756, "\xe2\x88\xb4"),
-               ENTITY_DEF("YIcy", 1031, "\xd0\x87"),
-               ENTITY_DEF("Tfr", 120087, "\xf0\x9d\x94\x97"),
-               ENTITY_DEF("Jcirc", 308, "\xc4\xb4"),
-               ENTITY_DEF("LessGreater", 8822, "\xe2\x89\xb6"),
-               ENTITY_DEF("Uring", 366, "\xc5\xae"),
-               ENTITY_DEF("Ugrave", 217, "\xc3\x99"),
-               ENTITY_DEF("rarr", 8594, "\xe2\x86\x92"),
-               ENTITY_DEF("wopf", 120168, "\xf0\x9d\x95\xa8"),
-               ENTITY_DEF("imath", 305, "\xc4\xb1"),
-               ENTITY_DEF("Yopf", 120144, "\xf0\x9d\x95\x90"),
-               ENTITY_DEF("colone", 8788, "\xe2\x89\x94"),
-               ENTITY_DEF("csube", 10961, "\xe2\xab\x91"),
-               ENTITY_DEF("odash", 8861, "\xe2\x8a\x9d"),
-               ENTITY_DEF("olarr", 8634, "\xe2\x86\xba"),
-               ENTITY_DEF("angrt", 8735, "\xe2\x88\x9f"),
-               ENTITY_DEF("NotLeftTriangleBar", 10703, "\xe2\xa7\x8f\xcc\xb8"),
-               ENTITY_DEF("GreaterEqual", 8805, "\xe2\x89\xa5"),
-               ENTITY_DEF("scnap", 10938, "\xe2\xaa\xba"),
-               ENTITY_DEF("pi", 960, "\xcf\x80"),
-               ENTITY_DEF("lesg", 8922, "\xe2\x8b\x9a\xef\xb8\x80"),
-               ENTITY_DEF("orderof", 8500, "\xe2\x84\xb4"),
-               ENTITY_DEF("uacute", 250, "\xc3\xba"),
-               ENTITY_DEF("Barv", 10983, "\xe2\xab\xa7"),
-               ENTITY_DEF("Theta", 920, "\xce\x98"),
-               ENTITY_DEF("leftrightsquigarrow", 8621, "\xe2\x86\xad"),
-               ENTITY_DEF("Atilde", 195, "\xc3\x83"),
-               ENTITY_DEF("cupdot", 8845, "\xe2\x8a\x8d"),
-               ENTITY_DEF("ntriangleright", 8939, "\xe2\x8b\xab"),
-               ENTITY_DEF("measuredangle", 8737, "\xe2\x88\xa1"),
-               ENTITY_DEF("jscr", 119999, "\xf0\x9d\x92\xbf"),
-               ENTITY_DEF("inodot", 305, "\xc4\xb1"),
-               ENTITY_DEF("mopf", 120158, "\xf0\x9d\x95\x9e"),
-               ENTITY_DEF("hkswarow", 10534, "\xe2\xa4\xa6"),
-               ENTITY_DEF("lopar", 10629, "\xe2\xa6\x85"),
-               ENTITY_DEF("thksim", 8764, "\xe2\x88\xbc"),
-               ENTITY_DEF("bkarow", 10509, "\xe2\xa4\x8d"),
-               ENTITY_DEF("rarrfs", 10526, "\xe2\xa4\x9e"),
-               ENTITY_DEF("ntrianglelefteq", 8940, "\xe2\x8b\xac"),
-               ENTITY_DEF("Bscr", 8492, "\xe2\x84\xac"),
-               ENTITY_DEF("topf", 120165, "\xf0\x9d\x95\xa5"),
-               ENTITY_DEF("Uacute", 218, "\xc3\x9a"),
-               ENTITY_DEF("lap", 10885, "\xe2\xaa\x85"),
-               ENTITY_DEF("djcy", 1106, "\xd1\x92"),
-               ENTITY_DEF("bopf", 120147, "\xf0\x9d\x95\x93"),
-               ENTITY_DEF("empty", 8709, "\xe2\x88\x85"),
-               ENTITY_DEF("LeftAngleBracket", 10216, "\xe2\x9f\xa8"),
-               ENTITY_DEF("Imacr", 298, "\xc4\xaa"),
-               ENTITY_DEF("ltcir", 10873, "\xe2\xa9\xb9"),
-               ENTITY_DEF("trisb", 10701, "\xe2\xa7\x8d"),
-               ENTITY_DEF("gjcy", 1107, "\xd1\x93"),
-               ENTITY_DEF("pr", 8826, "\xe2\x89\xba"),
-               ENTITY_DEF("Mu", 924, "\xce\x9c"),
-               ENTITY_DEF("ogon", 731, "\xcb\x9b"),
-               ENTITY_DEF("pertenk", 8241, "\xe2\x80\xb1"),
-               ENTITY_DEF("plustwo", 10791, "\xe2\xa8\xa7"),
-               ENTITY_DEF("Vfr", 120089, "\xf0\x9d\x94\x99"),
-               ENTITY_DEF("ApplyFunction", 8289, "\xe2\x81\xa1"),
-               ENTITY_DEF("Sub", 8912, "\xe2\x8b\x90"),
-               ENTITY_DEF("DoubleLeftRightArrow", 8660, "\xe2\x87\x94"),
-               ENTITY_DEF("Lmidot", 319, "\xc4\xbf"),
-               ENTITY_DEF("nwarrow", 8598, "\xe2\x86\x96"),
-               ENTITY_DEF("angrtvbd", 10653, "\xe2\xa6\x9d"),
-               ENTITY_DEF("fcy", 1092, "\xd1\x84"),
-               ENTITY_DEF("ltlarr", 10614, "\xe2\xa5\xb6"),
-               ENTITY_DEF("CircleMinus", 8854, "\xe2\x8a\x96"),
-               ENTITY_DEF("angmsdab", 10665, "\xe2\xa6\xa9"),
-               ENTITY_DEF("wedgeq", 8793, "\xe2\x89\x99"),
-               ENTITY_DEF("iogon", 303, "\xc4\xaf"),
-               ENTITY_DEF("laquo", 171, "\xc2\xab"),
-               ENTITY_DEF("NestedGreaterGreater", 8811, "\xe2\x89\xab"),
-               ENTITY_DEF("UnionPlus", 8846, "\xe2\x8a\x8e"),
-               ENTITY_DEF("CircleDot", 8857, "\xe2\x8a\x99"),
-               ENTITY_DEF("coloneq", 8788, "\xe2\x89\x94"),
-               ENTITY_DEF("csupe", 10962, "\xe2\xab\x92"),
-               ENTITY_DEF("tcaron", 357, "\xc5\xa5"),
-               ENTITY_DEF("GreaterTilde", 8819, "\xe2\x89\xb3"),
-               ENTITY_DEF("Map", 10501, "\xe2\xa4\x85"),
-               ENTITY_DEF("DoubleLongLeftArrow", 10232, "\xe2\x9f\xb8"),
-               ENTITY_DEF("Uparrow", 8657, "\xe2\x87\x91"),
-               ENTITY_DEF("scy", 1089, "\xd1\x81"),
-               ENTITY_DEF("llarr", 8647, "\xe2\x87\x87"),
-               ENTITY_DEF("rangle", 10217, "\xe2\x9f\xa9"),
-               ENTITY_DEF("sstarf", 8902, "\xe2\x8b\x86"),
-               ENTITY_DEF("InvisibleTimes", 8290, "\xe2\x81\xa2"),
-               ENTITY_DEF("egsdot", 10904, "\xe2\xaa\x98"),
-               ENTITY_DEF("target", 8982, "\xe2\x8c\x96"),
-               ENTITY_DEF("lesges", 10899, "\xe2\xaa\x93"),
-               ENTITY_DEF("curren", 164, "\xc2\xa4"),
-               ENTITY_DEF("yopf", 120170, "\xf0\x9d\x95\xaa"),
-               ENTITY_DEF("frac23", 8532, "\xe2\x85\x94"),
-               ENTITY_DEF("NotSucceedsTilde", 8831, "\xe2\x89\xbf\xcc\xb8"),
-               ENTITY_DEF("napprox", 8777, "\xe2\x89\x89"),
-               ENTITY_DEF("odblac", 337, "\xc5\x91"),
-               ENTITY_DEF("gammad", 989, "\xcf\x9d"),
-               ENTITY_DEF("dscr", 119993, "\xf0\x9d\x92\xb9"),
-               ENTITY_DEF("SupersetEqual", 8839, "\xe2\x8a\x87"),
-               ENTITY_DEF("squf", 9642, "\xe2\x96\xaa"),
-               ENTITY_DEF("Because", 8757, "\xe2\x88\xb5"),
-               ENTITY_DEF("sccue", 8829, "\xe2\x89\xbd"),
-               ENTITY_DEF("KHcy", 1061, "\xd0\xa5"),
-               ENTITY_DEF("Wcirc", 372, "\xc5\xb4"),
-               ENTITY_DEF("uparrow", 8593, "\xe2\x86\x91"),
-               ENTITY_DEF("lessgtr", 8822, "\xe2\x89\xb6"),
-               ENTITY_DEF("thickapprox", 8776, "\xe2\x89\x88"),
-               ENTITY_DEF("lbrksld", 10639, "\xe2\xa6\x8f"),
-               ENTITY_DEF("oslash", 248, "\xc3\xb8"),
-               ENTITY_DEF("NotCupCap", 8813, "\xe2\x89\xad"),
-               ENTITY_DEF("elinters", 9191, "\xe2\x8f\xa7"),
-               ENTITY_DEF("Assign", 8788, "\xe2\x89\x94"),
-               ENTITY_DEF("ClockwiseContourIntegral", 8754, "\xe2\x88\xb2"),
-               ENTITY_DEF("lfisht", 10620, "\xe2\xa5\xbc"),
-               ENTITY_DEF("DownArrow", 8595, "\xe2\x86\x93"),
-               ENTITY_DEF("Zdot", 379, "\xc5\xbb"),
-               ENTITY_DEF("xscr", 120013, "\xf0\x9d\x93\x8d"),
-               ENTITY_DEF("DiacriticalGrave", 96, "\x60"),
-               ENTITY_DEF("DoubleLongLeftRightArrow", 10234, "\xe2\x9f\xba"),
-               ENTITY_DEF("angle", 8736, "\xe2\x88\xa0"),
-               ENTITY_DEF("race", 8765, "\xe2\x88\xbd\xcc\xb1"),
-               ENTITY_DEF("Ascr", 119964, "\xf0\x9d\x92\x9c"),
-               ENTITY_DEF("Xscr", 119987, "\xf0\x9d\x92\xb3"),
-               ENTITY_DEF("acirc", 226, "\xc3\xa2"),
-               ENTITY_DEF("otimesas", 10806, "\xe2\xa8\xb6"),
-               ENTITY_DEF("gscr", 8458, "\xe2\x84\x8a"),
-               ENTITY_DEF("gcy", 1075, "\xd0\xb3"),
-               ENTITY_DEF("angmsdag", 10670, "\xe2\xa6\xae"),
-               ENTITY_DEF("tshcy", 1115, "\xd1\x9b"),
-               ENTITY_DEF("Acy", 1040, "\xd0\x90"),
-               ENTITY_DEF("NotGreaterLess", 8825, "\xe2\x89\xb9"),
-               ENTITY_DEF("dtdot", 8945, "\xe2\x8b\xb1"),
-               ENTITY_DEF("quot", 34, "\x22"),
-               ENTITY_DEF("micro", 181, "\xc2\xb5"),
-               ENTITY_DEF("simplus", 10788, "\xe2\xa8\xa4"),
-               ENTITY_DEF("nsupseteq", 8841, "\xe2\x8a\x89"),
-               ENTITY_DEF("Ufr", 120088, "\xf0\x9d\x94\x98"),
-               ENTITY_DEF("Pr", 10939, "\xe2\xaa\xbb"),
-               ENTITY_DEF("napid", 8779, "\xe2\x89\x8b\xcc\xb8"),
-               ENTITY_DEF("rceil", 8969, "\xe2\x8c\x89"),
-               ENTITY_DEF("boxtimes", 8864, "\xe2\x8a\xa0"),
-               ENTITY_DEF("erarr", 10609, "\xe2\xa5\xb1"),
-               ENTITY_DEF("downdownarrows", 8650, "\xe2\x87\x8a"),
-               ENTITY_DEF("Kfr", 120078, "\xf0\x9d\x94\x8e"),
-               ENTITY_DEF("mho", 8487, "\xe2\x84\xa7"),
-               ENTITY_DEF("scpolint", 10771, "\xe2\xa8\x93"),
-               ENTITY_DEF("vArr", 8661, "\xe2\x87\x95"),
-               ENTITY_DEF("Ccaron", 268, "\xc4\x8c"),
-               ENTITY_DEF("NotRightTriangle", 8939, "\xe2\x8b\xab"),
-               ENTITY_DEF("topbot", 9014, "\xe2\x8c\xb6"),
-               ENTITY_DEF("qopf", 120162, "\xf0\x9d\x95\xa2"),
-               ENTITY_DEF("eogon", 281, "\xc4\x99"),
-               ENTITY_DEF("luruhar", 10598, "\xe2\xa5\xa6"),
-               ENTITY_DEF("gtdot", 8919, "\xe2\x8b\x97"),
-               ENTITY_DEF("Egrave", 200, "\xc3\x88"),
-               ENTITY_DEF("roplus", 10798, "\xe2\xa8\xae"),
-               ENTITY_DEF("Intersection", 8898, "\xe2\x8b\x82"),
-               ENTITY_DEF("Uarr", 8607, "\xe2\x86\x9f"),
-               ENTITY_DEF("dcy", 1076, "\xd0\xb4"),
-               ENTITY_DEF("boxvl", 9508, "\xe2\x94\xa4"),
-               ENTITY_DEF("RightArrowBar", 8677, "\xe2\x87\xa5"),
-               ENTITY_DEF("yuml", 255, "\xc3\xbf"),
-               ENTITY_DEF("parallel", 8741, "\xe2\x88\xa5"),
-               ENTITY_DEF("succneqq", 10934, "\xe2\xaa\xb6"),
-               ENTITY_DEF("bemptyv", 10672, "\xe2\xa6\xb0"),
-               ENTITY_DEF("starf", 9733, "\xe2\x98\x85"),
-               ENTITY_DEF("OverBar", 8254, "\xe2\x80\xbe"),
-               ENTITY_DEF("Alpha", 913, "\xce\x91"),
-               ENTITY_DEF("LeftUpVectorBar", 10584, "\xe2\xa5\x98"),
-               ENTITY_DEF("ufr", 120114, "\xf0\x9d\x94\xb2"),
-               ENTITY_DEF("swarhk", 10534, "\xe2\xa4\xa6"),
-               ENTITY_DEF("GreaterEqualLess", 8923, "\xe2\x8b\x9b"),
-               ENTITY_DEF("sscr", 120008, "\xf0\x9d\x93\x88"),
-               ENTITY_DEF("Pi", 928, "\xce\xa0"),
-               ENTITY_DEF("boxh", 9472, "\xe2\x94\x80"),
-               ENTITY_DEF("frac16", 8537, "\xe2\x85\x99"),
-               ENTITY_DEF("lbrack", 91, "\x5b"),
-               ENTITY_DEF("vert", 124, "\x7c"),
-               ENTITY_DEF("precneqq", 10933, "\xe2\xaa\xb5"),
-               ENTITY_DEF("NotGreaterSlantEqual", 10878, "\xe2\xa9\xbe\xcc\xb8"),
-               ENTITY_DEF("Omega", 937, "\xce\xa9"),
-               ENTITY_DEF("uarr", 8593, "\xe2\x86\x91"),
-               ENTITY_DEF("boxVr", 9567, "\xe2\x95\x9f"),
-               ENTITY_DEF("ruluhar", 10600, "\xe2\xa5\xa8"),
-               ENTITY_DEF("ShortLeftArrow", 8592, "\xe2\x86\x90"),
-               ENTITY_DEF("Qfr", 120084, "\xf0\x9d\x94\x94"),
-               ENTITY_DEF("olt", 10688, "\xe2\xa7\x80"),
-               ENTITY_DEF("nequiv", 8802, "\xe2\x89\xa2"),
-               ENTITY_DEF("fscr", 119995, "\xf0\x9d\x92\xbb"),
-               ENTITY_DEF("rarrhk", 8618, "\xe2\x86\xaa"),
-               ENTITY_DEF("nsqsupe", 8931, "\xe2\x8b\xa3"),
-               ENTITY_DEF("nsubseteq", 8840, "\xe2\x8a\x88"),
-               ENTITY_DEF("numero", 8470, "\xe2\x84\x96"),
-               ENTITY_DEF("emsp14", 8197, "\xe2\x80\x85"),
-               ENTITY_DEF("gl", 8823, "\xe2\x89\xb7"),
-               ENTITY_DEF("ocirc", 244, "\xc3\xb4"),
-               ENTITY_DEF("weierp", 8472, "\xe2\x84\x98"),
-               ENTITY_DEF("boxvL", 9569, "\xe2\x95\xa1"),
-               ENTITY_DEF("RightArrowLeftArrow", 8644, "\xe2\x87\x84"),
-               ENTITY_DEF("Precedes", 8826, "\xe2\x89\xba"),
-               ENTITY_DEF("RightVector", 8640, "\xe2\x87\x80"),
-               ENTITY_DEF("xcup", 8899, "\xe2\x8b\x83"),
-               ENTITY_DEF("angmsdad", 10667, "\xe2\xa6\xab"),
-               ENTITY_DEF("gtrsim", 8819, "\xe2\x89\xb3"),
-               ENTITY_DEF("natural", 9838, "\xe2\x99\xae"),
-               ENTITY_DEF("nVdash", 8878, "\xe2\x8a\xae"),
-               ENTITY_DEF("RightTriangleEqual", 8885, "\xe2\x8a\xb5"),
-               ENTITY_DEF("dscy", 1109, "\xd1\x95"),
-               ENTITY_DEF("leftthreetimes", 8907, "\xe2\x8b\x8b"),
-               ENTITY_DEF("prsim", 8830, "\xe2\x89\xbe"),
-               ENTITY_DEF("Bcy", 1041, "\xd0\x91"),
-               ENTITY_DEF("Chi", 935, "\xce\xa7"),
-               ENTITY_DEF("timesb", 8864, "\xe2\x8a\xa0"),
-               ENTITY_DEF("Del", 8711, "\xe2\x88\x87"),
-               ENTITY_DEF("lmidot", 320, "\xc5\x80"),
-               ENTITY_DEF("RightDownVector", 8642, "\xe2\x87\x82"),
-               ENTITY_DEF("simdot", 10858, "\xe2\xa9\xaa"),
-               ENTITY_DEF("FilledVerySmallSquare", 9642, "\xe2\x96\xaa"),
-               ENTITY_DEF("NotLessSlantEqual", 10877, "\xe2\xa9\xbd\xcc\xb8"),
-               ENTITY_DEF("SucceedsTilde", 8831, "\xe2\x89\xbf"),
-               ENTITY_DEF("duarr", 8693, "\xe2\x87\xb5"),
-               ENTITY_DEF("apE", 10864, "\xe2\xa9\xb0"),
-               ENTITY_DEF("odot", 8857, "\xe2\x8a\x99"),
-               ENTITY_DEF("mldr", 8230, "\xe2\x80\xa6"),
-               ENTITY_DEF("Uarrocir", 10569, "\xe2\xa5\x89"),
-               ENTITY_DEF("nLl", 8920, "\xe2\x8b\x98\xcc\xb8"),
-               ENTITY_DEF("rarrpl", 10565, "\xe2\xa5\x85"),
-               ENTITY_DEF("cir", 9675, "\xe2\x97\x8b"),
-               ENTITY_DEF("blk14", 9617, "\xe2\x96\x91"),
-               ENTITY_DEF("VerticalLine", 124, "\x7c"),
-               ENTITY_DEF("jcy", 1081, "\xd0\xb9"),
-               ENTITY_DEF("filig", 64257, "\xef\xac\x81"),
-               ENTITY_DEF("LongRightArrow", 10230, "\xe2\x9f\xb6"),
-               ENTITY_DEF("beta", 946, "\xce\xb2"),
-               ENTITY_DEF("ccupssm", 10832, "\xe2\xa9\x90"),
-               ENTITY_DEF("supsub", 10964, "\xe2\xab\x94"),
-               ENTITY_DEF("spar", 8741, "\xe2\x88\xa5"),
-               ENTITY_DEF("Tstrok", 358, "\xc5\xa6"),
-               ENTITY_DEF("isinv", 8712, "\xe2\x88\x88"),
-               ENTITY_DEF("rightsquigarrow", 8605, "\xe2\x86\x9d"),
-               ENTITY_DEF("Diamond", 8900, "\xe2\x8b\x84"),
-               ENTITY_DEF("curlyeqsucc", 8927, "\xe2\x8b\x9f"),
-               ENTITY_DEF("ijlig", 307, "\xc4\xb3"),
-               ENTITY_DEF("puncsp", 8200, "\xe2\x80\x88"),
-               ENTITY_DEF("hamilt", 8459, "\xe2\x84\x8b"),
-               ENTITY_DEF("mapstoleft", 8612, "\xe2\x86\xa4"),
-               ENTITY_DEF("Copf", 8450, "\xe2\x84\x82"),
-               ENTITY_DEF("prnsim", 8936, "\xe2\x8b\xa8"),
-               ENTITY_DEF("DotDot", 8412, "\xe2\x83\x9c"),
-               ENTITY_DEF("lobrk", 10214, "\xe2\x9f\xa6"),
-               ENTITY_DEF("twoheadrightarrow", 8608, "\xe2\x86\xa0"),
-               ENTITY_DEF("ngE", 8807, "\xe2\x89\xa7\xcc\xb8"),
-               ENTITY_DEF("cylcty", 9005, "\xe2\x8c\xad"),
-               ENTITY_DEF("sube", 8838, "\xe2\x8a\x86"),
-               ENTITY_DEF("NotEqualTilde", 8770, "\xe2\x89\x82\xcc\xb8"),
-               ENTITY_DEF("Yuml", 376, "\xc5\xb8"),
-               ENTITY_DEF("comp", 8705, "\xe2\x88\x81"),
-               ENTITY_DEF("dotminus", 8760, "\xe2\x88\xb8"),
-               ENTITY_DEF("crarr", 8629, "\xe2\x86\xb5"),
-               ENTITY_DEF("imped", 437, "\xc6\xb5"),
-               ENTITY_DEF("barwedge", 8965, "\xe2\x8c\x85"),
-               ENTITY_DEF("harrcir", 10568, "\xe2\xa5\x88")
-);
-
-class html_entities_storage {
-       robin_hood::unordered_flat_map<std::string_view, html_entity_def> entity_by_name;
-       robin_hood::unordered_flat_map<unsigned, html_entity_def> entity_by_id;
-public:
-       html_entities_storage() {
-               entity_by_name.reserve(html_entities_array.size());
-               entity_by_id.reserve(html_entities_array.size());
-
-               for (const auto &e : html_entities_array) {
-                       entity_by_name[e.name] = e;
-                       entity_by_id[e.code] = e;
-               }
-       }
-
-       auto by_name(std::string_view name) -> const html_entity_def* {
-               auto it = entity_by_name.find(name);
-
-               if (it != entity_by_name.end()) {
-                       return &(it->second);
-               }
-
-               return nullptr;
-       }
-
-       auto by_id(tag_id_t id) -> const html_entity_def* {
-               auto it = entity_by_id.find(id);
-               if (it != entity_by_id.end()) {
-                       return &(it->second);
-               }
-
-               return nullptr;
-       }
-};
+std::size_t decode_html_entitles_inplace (char *s, std::size_t len);
 
 }
 
index 8cd6617d8a07ebdd0485cd1c4323539715c7b911..ef3d8d99ed9e1b6e3517462ba5d7fa4d371fd526 100644 (file)
@@ -192,7 +192,6 @@ public:
        }
 };
 
-
 }
 
 #endif //RSPAMD_HTML_TAG_DEFS_HXX