From: Vsevolod Stakhov Date: Tue, 29 Jun 2021 12:21:47 +0000 (+0100) Subject: [Rework] Remove tag name string X-Git-Tag: 3.0~233 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=ad9f3e26b430e9a1d39fc2109b22e580e3e91a25;p=rspamd.git [Rework] Remove tag name string --- diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index c5d35105c..a55266b19 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -369,12 +369,14 @@ find_tag_component_name(rspamd_mempool_t *pool, struct tag_content_parser_state { int cur_state = 0; const char *saved_p = nullptr; + const char *tag_name_start = nullptr; std::optional cur_component; void reset() { cur_state = 0; saved_p = nullptr; + tag_name_start = nullptr; cur_component = std::nullopt; } }; @@ -441,22 +443,22 @@ html_parse_tag_content(rspamd_mempool_t *pool, } else if (g_ascii_isalpha (*in)) { state = parse_name; - tag->name = std::string_view{in, 0}; + parser_env.tag_name_start = in; } break; case parse_name: - if (g_ascii_isspace (*in) || *in == '>' || *in == '/') { - const auto *start = tag->name.begin(); + if ((g_ascii_isspace (*in) || *in == '>' || *in == '/') && parser_env.tag_name_start) { + const auto *start = parser_env.tag_name_start; g_assert (in >= start); if (*in == '/') { tag->flags |= FL_CLOSED; } - tag->name = std::string_view{start, (std::size_t)(in - start)}; + const auto tag_name_len = in - start; - if (tag->name.empty()) { + if (tag_name_len== 0) { hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS; tag->id = -1; tag->flags |= FL_BROKEN; @@ -466,14 +468,13 @@ html_parse_tag_content(rspamd_mempool_t *pool, /* * Copy tag name to the temporary buffer for modifications */ - auto *s = rspamd_mempool_alloc_buffer(pool, tag->name.size() + 1); - rspamd_strlcpy(s, tag->name.data(), tag->name.size() + 1); + auto *s = rspamd_mempool_alloc_buffer(pool, tag_name_len + 1); + rspamd_strlcpy(s, parser_env.tag_name_start, tag_name_len + 1); auto nsize = rspamd_html_decode_entitles_inplace(s, - tag->name.size()); + tag_name_len); nsize = rspamd_str_lc_utf8(s, nsize); - tag->name = std::string_view{s, nsize}; - const auto *tag_def = rspamd::html::html_tags_defs.by_name(tag->name); + const auto *tag_def = rspamd::html::html_tags_defs.by_name({s, nsize}); if (tag_def == nullptr) { hc->flags |= RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS; @@ -1847,7 +1848,8 @@ html_debug_structure(const html_content &hc) -> std::string if (hc.root_tag) { auto rec_functor = [&](const html_tag *t, int level, auto rec_functor) -> void { std::string pluses(level, '+'); - output += fmt::format("{}{};", pluses, t->name); + output += fmt::format("{}{};", pluses, + html_tags_defs.name_by_id_safe(t->id)); for (const auto *cld : t->children) { rec_functor(cld, level + 1, rec_functor); } @@ -2066,12 +2068,13 @@ const gchar * rspamd_html_tag_name(void *p, gsize *len) { auto *tag = reinterpret_cast(p); + auto tname = rspamd::html::html_tags_defs.name_by_id_safe(tag->id); if (len) { - *len = tag->name.size(); + *len = tname.size(); } - return tag->name.data(); + return tname.data(); } struct html_image* diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx index 36110c8c7..a79195a5f 100644 --- a/src/libserver/html/html_tag.hxx +++ b/src/libserver/html/html_tag.hxx @@ -78,7 +78,6 @@ struct html_tag { std::uint32_t flags = 0; std::int32_t id = -1; - std::string_view name; std::vector parameters; html_tag_extra_t extra; diff --git a/src/libserver/html/html_tag_defs.hxx b/src/libserver/html/html_tag_defs.hxx index 36d3ba4ed..fe08d081b 100644 --- a/src/libserver/html/html_tag_defs.hxx +++ b/src/libserver/html/html_tag_defs.hxx @@ -190,6 +190,15 @@ public: return nullptr; } + + auto name_by_id_safe(int id) const -> std::string_view { + auto it = tag_by_id.find(static_cast(id)); + if (it != tag_by_id.end()) { + return it->second.name; + } + + return "unknown"; + } }; }