From: Vsevolod Stakhov Date: Thu, 29 Jul 2021 16:43:49 +0000 (+0100) Subject: [Minor] Further fixes to the html tags content methods X-Git-Tag: 3.0~75 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=b369727b44bf851981ca88b764e9429428154b72;p=rspamd.git [Minor] Further fixes to the html tags content methods --- diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx index 53fe815dd..fd0bfa495 100644 --- a/src/libserver/html/html.cxx +++ b/src/libserver/html/html.cxx @@ -1976,7 +1976,8 @@ html_process_input(rspamd_mempool_t *pool, } break; case tags_limit_overflow: - html_append_parsed(hc, {c, (std::size_t) (end - c)}, false, end - start); + html_append_parsed(hc, {c, (std::size_t) (end - c)}, + false, end - start); break; default: /* Do nothing */ diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx index 5cd22d777..357e11bfb 100644 --- a/src/libserver/html/html_tag.hxx +++ b/src/libserver/html/html_tag.hxx @@ -137,6 +137,20 @@ struct html_tag { return 0; } + + constexpr auto get_content(std::string_view parsed) const -> std::string_view { + const auto clen = get_content_length(); + if (content_offset < parsed.size()) { + if (parsed.size() - content_offset >= clen) { + return parsed.substr(content_offset, clen); + } + else { + return parsed.substr(content_offset, parsed.size() - content_offset); + } + } + + return std::string_view{}; + } }; static_assert(CM_USER_SHIFT + 7 < sizeof(html_tag::flags) * NBBY); diff --git a/src/lua/lua_html.cxx b/src/lua/lua_html.cxx index 2b0f63c02..d20de03c0 100644 --- a/src/lua/lua_html.cxx +++ b/src/lua/lua_html.cxx @@ -448,8 +448,9 @@ lua_html_foreach_tag (lua_State *L) auto *ltag = static_cast(lua_newuserdata(L, sizeof(lua_html_tag))); ltag->tag = tag; ltag->html = hc; + auto ct = ltag->tag->get_content(hc->parsed); rspamd_lua_setclass (L, "rspamd{html_tag}", -1); - lua_pushinteger (L, tag->get_content_length()); + lua_pushinteger (L, ct.size()); /* Leaf flag */ if (tag->children.empty()) { @@ -579,17 +580,16 @@ lua_html_tag_get_content (lua_State *L) struct rspamd_lua_text *t; if (ltag) { - auto clen = ltag->tag->get_content_length(); - if (ltag->html && clen && ltag->html->parsed.size() > ltag->tag->content_offset) { - if (ltag->html->parsed.size() - ltag->tag->content_offset < clen) { - clen = ltag->html->parsed.size() - ltag->tag->content_offset; + + if (ltag->html) { + auto ct = ltag->tag->get_content(ltag->html->parsed); + if (ct.size() > 0) { + t = static_cast(lua_newuserdata(L, sizeof(*t))); + rspamd_lua_setclass(L, "rspamd{text}", -1); + t->start = ct.data(); + t->len = ct.size(); + t->flags = 0; } - t = static_cast(lua_newuserdata(L, sizeof(*t))); - rspamd_lua_setclass (L, "rspamd{text}", -1); - t->start = reinterpret_cast(ltag->html->parsed.data()) + - ltag->tag->content_offset; - t->len = clen; - t->flags = 0; } else { lua_pushnil (L); @@ -609,7 +609,13 @@ lua_html_tag_get_content_length (lua_State *L) struct lua_html_tag *ltag = lua_check_html_tag (L, 1); if (ltag) { - lua_pushinteger (L, ltag->tag->closing.start - ltag->tag->content_offset); + if (ltag->html) { + auto ct = ltag->tag->get_content(ltag->html->parsed); + lua_pushinteger (L, ct.size()); + } + else { + lua_pushinteger (L, ltag->tag->get_content_length()); + } } else { return luaL_error (L, "invalid arguments");