summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2021-07-29 17:43:49 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2021-07-29 17:43:49 +0100
commitb369727b44bf851981ca88b764e9429428154b72 (patch)
tree4e0662bac0d3d0a518e26e88183e99cfcbc0dd7c /src
parent47e8abad8456fd3cd46d98241c6bc9ca71cf9368 (diff)
downloadrspamd-b369727b44bf851981ca88b764e9429428154b72.tar.gz
rspamd-b369727b44bf851981ca88b764e9429428154b72.zip
[Minor] Further fixes to the html tags content methods
Diffstat (limited to 'src')
-rw-r--r--src/libserver/html/html.cxx3
-rw-r--r--src/libserver/html/html_tag.hxx14
-rw-r--r--src/lua/lua_html.cxx30
3 files changed, 34 insertions, 13 deletions
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 53fe815dd..fd0bfa495 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1976,7 +1976,8 @@ html_process_input(rspamd_mempool_t *pool,
}
break;
case tags_limit_overflow:
- html_append_parsed(hc, {c, (std::size_t) (end - c)}, false, end - start);
+ html_append_parsed(hc, {c, (std::size_t) (end - c)},
+ false, end - start);
break;
default:
/* Do nothing */
diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx
index 5cd22d777..357e11bfb 100644
--- a/src/libserver/html/html_tag.hxx
+++ b/src/libserver/html/html_tag.hxx
@@ -137,6 +137,20 @@ struct html_tag {
return 0;
}
+
+ constexpr auto get_content(std::string_view parsed) const -> std::string_view {
+ const auto clen = get_content_length();
+ if (content_offset < parsed.size()) {
+ if (parsed.size() - content_offset >= clen) {
+ return parsed.substr(content_offset, clen);
+ }
+ else {
+ return parsed.substr(content_offset, parsed.size() - content_offset);
+ }
+ }
+
+ return std::string_view{};
+ }
};
static_assert(CM_USER_SHIFT + 7 < sizeof(html_tag::flags) * NBBY);
diff --git a/src/lua/lua_html.cxx b/src/lua/lua_html.cxx
index 2b0f63c02..d20de03c0 100644
--- a/src/lua/lua_html.cxx
+++ b/src/lua/lua_html.cxx
@@ -448,8 +448,9 @@ lua_html_foreach_tag (lua_State *L)
auto *ltag = static_cast<lua_html_tag *>(lua_newuserdata(L, sizeof(lua_html_tag)));
ltag->tag = tag;
ltag->html = hc;
+ auto ct = ltag->tag->get_content(hc->parsed);
rspamd_lua_setclass (L, "rspamd{html_tag}", -1);
- lua_pushinteger (L, tag->get_content_length());
+ lua_pushinteger (L, ct.size());
/* Leaf flag */
if (tag->children.empty()) {
@@ -579,17 +580,16 @@ lua_html_tag_get_content (lua_State *L)
struct rspamd_lua_text *t;
if (ltag) {
- auto clen = ltag->tag->get_content_length();
- if (ltag->html && clen && ltag->html->parsed.size() > ltag->tag->content_offset) {
- if (ltag->html->parsed.size() - ltag->tag->content_offset < clen) {
- clen = ltag->html->parsed.size() - ltag->tag->content_offset;
+
+ if (ltag->html) {
+ auto ct = ltag->tag->get_content(ltag->html->parsed);
+ if (ct.size() > 0) {
+ t = static_cast<rspamd_lua_text *>(lua_newuserdata(L, sizeof(*t)));
+ rspamd_lua_setclass(L, "rspamd{text}", -1);
+ t->start = ct.data();
+ t->len = ct.size();
+ t->flags = 0;
}
- t = static_cast<rspamd_lua_text *>(lua_newuserdata(L, sizeof(*t)));
- rspamd_lua_setclass (L, "rspamd{text}", -1);
- t->start = reinterpret_cast<const char *>(ltag->html->parsed.data()) +
- ltag->tag->content_offset;
- t->len = clen;
- t->flags = 0;
}
else {
lua_pushnil (L);
@@ -609,7 +609,13 @@ lua_html_tag_get_content_length (lua_State *L)
struct lua_html_tag *ltag = lua_check_html_tag (L, 1);
if (ltag) {
- lua_pushinteger (L, ltag->tag->closing.start - ltag->tag->content_offset);
+ if (ltag->html) {
+ auto ct = ltag->tag->get_content(ltag->html->parsed);
+ lua_pushinteger (L, ct.size());
+ }
+ else {
+ lua_pushinteger (L, ltag->tag->get_content_length());
+ }
}
else {
return luaL_error (L, "invalid arguments");