aboutsummaryrefslogtreecommitdiffstats
path: root/src/lua/lua_parsers.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lua/lua_parsers.c')
-rw-r--r--src/lua/lua_parsers.c67
1 files changed, 66 insertions, 1 deletions
diff --git a/src/lua/lua_parsers.c b/src/lua/lua_parsers.c
index 39e1b0317..eb7fa6bf5 100644
--- a/src/lua/lua_parsers.c
+++ b/src/lua/lua_parsers.c
@@ -46,6 +46,14 @@
*/
/***
+ * @function parsers.parse_html_content(input, mempool)
+ * Parses HTML and returns the HTML content object for structure analysis
+ * @param {string|text} in input HTML
+ * @param {rspamd_mempool} mempool memory pool for HTML content management
+ * @return {html_content} HTML content object with tag structure
+ */
+LUA_FUNCTION_DEF(parsers, parse_html_content);
+/***
* @function parsers.parse_mail_address(str, [pool])
* Parses email address and returns a table of tables in the following format:
*
@@ -93,6 +101,7 @@
static const struct luaL_reg parserslib_f[] = {
LUA_INTERFACE_DEF(parsers, tokenize_text),
LUA_INTERFACE_DEF(parsers, parse_html),
+ LUA_INTERFACE_DEF(parsers, parse_html_content),
LUA_INTERFACE_DEF(parsers, parse_mail_address),
LUA_INTERFACE_DEF(parsers, parse_content_type),
LUA_INTERFACE_DEF(parsers, parse_smtp_date),
@@ -242,6 +251,62 @@ int lua_parsers_parse_html(lua_State *L)
return 1;
}
+static int lua_parsers_parse_html_content(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t;
+ const char *start = NULL;
+ gsize len;
+ GByteArray *in;
+ rspamd_mempool_t *pool;
+ void *hc;
+ void **phc;
+
+ if (lua_type(L, 1) == LUA_TUSERDATA) {
+ t = lua_check_text(L, 1);
+
+ if (t != NULL) {
+ start = t->start;
+ len = t->len;
+ }
+ }
+ else if (lua_type(L, 1) == LUA_TSTRING) {
+ start = luaL_checklstring(L, 1, &len);
+ }
+
+ if (lua_type(L, 2) != LUA_TUSERDATA) {
+ return luaL_error(L, "invalid arguments: mempool expected as second argument");
+ }
+
+ pool = rspamd_lua_check_mempool(L, 2);
+ if (!pool) {
+ return luaL_error(L, "invalid mempool argument");
+ }
+
+ if (start != NULL) {
+ in = g_byte_array_sized_new(len);
+ g_byte_array_append(in, start, len);
+
+ hc = rspamd_html_process_part(pool, in);
+
+ if (hc) {
+ phc = lua_newuserdata(L, sizeof(void *));
+ *phc = hc;
+ rspamd_lua_setclass(L, rspamd_html_classname, -1);
+ }
+ else {
+ lua_pushnil(L);
+ }
+
+ g_byte_array_free(in, TRUE);
+ }
+ else {
+ lua_pushnil(L);
+ }
+
+ return 1;
+}
+
int lua_parsers_parse_mail_address(lua_State *L)
{
LUA_TRACE_POINT;
@@ -409,4 +474,4 @@ lua_load_parsers(lua_State *L)
void luaopen_parsers(lua_State *L)
{
rspamd_lua_add_preload(L, "rspamd_parsers", lua_load_parsers);
-} \ No newline at end of file
+}