From 7d3e0fc85e29b504e2fcfa83140708d7fea80eca Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 29 Jul 2021 10:32:58 +0100 Subject: [PATCH] [Minor] Some fixes in content extraction for html tags --- src/libserver/html/html_tag.hxx | 11 +++++++++++ src/lua/lua_html.cxx | 14 ++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx index d7e80f41b..5cd22d777 100644 --- a/src/libserver/html/html_tag.hxx +++ b/src/libserver/html/html_tag.hxx @@ -126,6 +126,17 @@ struct html_tag { children.clear(); closing.clear(); } + + constexpr auto get_content_length() const -> std::size_t { + if (flags & (FL_IGNORE|CM_HEAD)) { + return 0; + } + if (closing.start > content_offset) { + return closing.start - content_offset; + } + + return 0; + } }; static_assert(CM_USER_SHIFT + 7 < sizeof(html_tag::flags) * NBBY); diff --git a/src/lua/lua_html.cxx b/src/lua/lua_html.cxx index 250203d6e..9a562d2fa 100644 --- a/src/lua/lua_html.cxx +++ b/src/lua/lua_html.cxx @@ -441,13 +441,15 @@ lua_html_foreach_tag (lua_State *L) if (hc && (any || !tags.empty()) && lua_isfunction (L, 3)) { hc->traverse_all_tags([&](const rspamd::html::html_tag *tag) -> bool { if (tag && (any || tags.contains(tag->id))) { + lua_pushcfunction (L, &rspamd_lua_traceback); + auto err_idx = lua_gettop(L); lua_pushvalue(L, 3); auto *ltag = static_cast(lua_newuserdata(L, sizeof(lua_html_tag))); ltag->tag = tag; ltag->html = hc; rspamd_lua_setclass (L, "rspamd{html_tag}", -1); - lua_pushinteger (L, tag->closing.start - tag->content_offset); + lua_pushinteger (L, tag->get_content_length()); /* Leaf flag */ if (tag->children.empty()) { @@ -457,18 +459,18 @@ lua_html_foreach_tag (lua_State *L) lua_pushboolean (L, false); } - if (lua_pcall (L, 3, 1, 0) != 0) { + if (lua_pcall (L, 3, 1, err_idx) != 0) { msg_err ("error in foreach_tag callback: %s", lua_tostring (L, -1)); - lua_pop (L, 1); + lua_settop(L, err_idx - 1); return false; } if (lua_toboolean (L, -1)) { - lua_pop(L, 1); + lua_settop(L, err_idx - 1); return false; } - lua_pop(L, 1); + lua_settop(L, err_idx - 1); } return true; @@ -577,7 +579,7 @@ lua_html_tag_get_content (lua_State *L) struct rspamd_lua_text *t; if (ltag) { - auto clen = ltag->tag->closing.start - ltag->tag->content_offset; + auto clen = ltag->tag->get_content_length(); if (ltag->html && clen && ltag->html->parsed.size() >= ltag->tag->content_offset + clen) { t = static_cast(lua_newuserdata(L, sizeof(*t))); -- 2.39.5