diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-07-29 10:32:58 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-07-29 10:33:31 +0100 |
commit | 7d3e0fc85e29b504e2fcfa83140708d7fea80eca (patch) | |
tree | 8b119286e566bb5611ae70b15782cb69b9a5887e | |
parent | b3e427079527a5bf958e1d3de52c686a7accc33b (diff) | |
download | rspamd-7d3e0fc85e29b504e2fcfa83140708d7fea80eca.tar.gz rspamd-7d3e0fc85e29b504e2fcfa83140708d7fea80eca.zip |
[Minor] Some fixes in content extraction for html tags
-rw-r--r-- | src/libserver/html/html_tag.hxx | 11 | ||||
-rw-r--r-- | src/lua/lua_html.cxx | 14 |
2 files changed, 19 insertions, 6 deletions
diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx index d7e80f41b..5cd22d777 100644 --- a/src/libserver/html/html_tag.hxx +++ b/src/libserver/html/html_tag.hxx @@ -126,6 +126,17 @@ struct html_tag { children.clear(); closing.clear(); } + + constexpr auto get_content_length() const -> std::size_t { + if (flags & (FL_IGNORE|CM_HEAD)) { + return 0; + } + if (closing.start > content_offset) { + return closing.start - content_offset; + } + + return 0; + } }; static_assert(CM_USER_SHIFT + 7 < sizeof(html_tag::flags) * NBBY); diff --git a/src/lua/lua_html.cxx b/src/lua/lua_html.cxx index 250203d6e..9a562d2fa 100644 --- a/src/lua/lua_html.cxx +++ b/src/lua/lua_html.cxx @@ -441,13 +441,15 @@ lua_html_foreach_tag (lua_State *L) if (hc && (any || !tags.empty()) && lua_isfunction (L, 3)) { hc->traverse_all_tags([&](const rspamd::html::html_tag *tag) -> bool { if (tag && (any || tags.contains(tag->id))) { + lua_pushcfunction (L, &rspamd_lua_traceback); + auto err_idx = lua_gettop(L); lua_pushvalue(L, 3); auto *ltag = static_cast<lua_html_tag *>(lua_newuserdata(L, sizeof(lua_html_tag))); ltag->tag = tag; ltag->html = hc; rspamd_lua_setclass (L, "rspamd{html_tag}", -1); - lua_pushinteger (L, tag->closing.start - tag->content_offset); + lua_pushinteger (L, tag->get_content_length()); /* Leaf flag */ if (tag->children.empty()) { @@ -457,18 +459,18 @@ lua_html_foreach_tag (lua_State *L) lua_pushboolean (L, false); } - if (lua_pcall (L, 3, 1, 0) != 0) { + if (lua_pcall (L, 3, 1, err_idx) != 0) { msg_err ("error in foreach_tag callback: %s", lua_tostring (L, -1)); - lua_pop (L, 1); + lua_settop(L, err_idx - 1); return false; } if (lua_toboolean (L, -1)) { - lua_pop(L, 1); + lua_settop(L, err_idx - 1); return false; } - lua_pop(L, 1); + lua_settop(L, err_idx - 1); } return true; @@ -577,7 +579,7 @@ lua_html_tag_get_content (lua_State *L) struct rspamd_lua_text *t; if (ltag) { - auto clen = ltag->tag->closing.start - ltag->tag->content_offset; + auto clen = ltag->tag->get_content_length(); if (ltag->html && clen && ltag->html->parsed.size() >= ltag->tag->content_offset + clen) { t = static_cast<rspamd_lua_text *>(lua_newuserdata(L, sizeof(*t))); |