]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Further fixes to the html tags content methods
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 29 Jul 2021 16:43:49 +0000 (17:43 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 29 Jul 2021 16:43:49 +0000 (17:43 +0100)
src/libserver/html/html.cxx
src/libserver/html/html_tag.hxx
src/lua/lua_html.cxx

index 53fe815ddfea4784b32a2b954e545b0a98271734..fd0bfa495a559a31ccfc4446194ccee99a9b18f1 100644 (file)
@@ -1976,7 +1976,8 @@ html_process_input(rspamd_mempool_t *pool,
                }
                break;
        case tags_limit_overflow:
-               html_append_parsed(hc, {c, (std::size_t) (end - c)}, false, end - start);
+               html_append_parsed(hc, {c, (std::size_t) (end - c)},
+                               false, end - start);
                break;
        default:
                /* Do nothing */
index 5cd22d77757ecfccf7231190dcaf4a4345279847..357e11bfb0d939df6367ec5890c1f56bf51202b6 100644 (file)
@@ -137,6 +137,20 @@ struct html_tag {
 
                return 0;
        }
+
+       constexpr auto get_content(std::string_view parsed) const -> std::string_view {
+               const auto clen = get_content_length();
+               if (content_offset < parsed.size()) {
+                       if (parsed.size() - content_offset >= clen) {
+                               return parsed.substr(content_offset, clen);
+                       }
+                       else {
+                               return parsed.substr(content_offset, parsed.size() - content_offset);
+                       }
+               }
+
+               return std::string_view{};
+       }
 };
 
 static_assert(CM_USER_SHIFT + 7 < sizeof(html_tag::flags) * NBBY);
index 2b0f63c02c05cc3e174bca9a4099a40cb55f3214..d20de03c0af92b28a8c2dff8267174fb338ae710 100644 (file)
@@ -448,8 +448,9 @@ lua_html_foreach_tag (lua_State *L)
                                auto *ltag = static_cast<lua_html_tag *>(lua_newuserdata(L, sizeof(lua_html_tag)));
                                ltag->tag = tag;
                                ltag->html = hc;
+                               auto ct = ltag->tag->get_content(hc->parsed);
                                rspamd_lua_setclass (L, "rspamd{html_tag}", -1);
-                               lua_pushinteger (L, tag->get_content_length());
+                               lua_pushinteger (L, ct.size());
 
                                /* Leaf flag */
                                if (tag->children.empty()) {
@@ -579,17 +580,16 @@ lua_html_tag_get_content (lua_State *L)
        struct rspamd_lua_text *t;
 
        if (ltag) {
-               auto clen = ltag->tag->get_content_length();
-               if (ltag->html && clen && ltag->html->parsed.size() > ltag->tag->content_offset) {
-                       if (ltag->html->parsed.size() - ltag->tag->content_offset < clen) {
-                               clen = ltag->html->parsed.size() - ltag->tag->content_offset;
+
+               if (ltag->html) {
+                       auto ct = ltag->tag->get_content(ltag->html->parsed);
+                       if (ct.size() > 0) {
+                               t = static_cast<rspamd_lua_text *>(lua_newuserdata(L, sizeof(*t)));
+                               rspamd_lua_setclass(L, "rspamd{text}", -1);
+                               t->start = ct.data();
+                               t->len = ct.size();
+                               t->flags = 0;
                        }
-                       t = static_cast<rspamd_lua_text *>(lua_newuserdata(L, sizeof(*t)));
-                       rspamd_lua_setclass (L, "rspamd{text}", -1);
-                       t->start = reinterpret_cast<const char *>(ltag->html->parsed.data()) +
-                                       ltag->tag->content_offset;
-                       t->len = clen;
-                       t->flags = 0;
                }
                else {
                        lua_pushnil (L);
@@ -609,7 +609,13 @@ lua_html_tag_get_content_length (lua_State *L)
        struct lua_html_tag *ltag = lua_check_html_tag (L, 1);
 
        if (ltag) {
-               lua_pushinteger (L, ltag->tag->closing.start - ltag->tag->content_offset);
+               if (ltag->html) {
+                       auto ct = ltag->tag->get_content(ltag->html->parsed);
+                       lua_pushinteger (L, ct.size());
+               }
+               else {
+                       lua_pushinteger (L, ltag->tag->get_content_length());
+               }
        }
        else {
                return luaL_error (L, "invalid arguments");