summaryrefslogtreecommitdiffstats
path: root/src/lua
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-08-05 11:11:27 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-08-05 11:11:27 +0100
commit640c6f0f2df677c63bf602c03934394516c90121 (patch)
treec0d2ee781de988ba3da43d9a8a5670fe19726e72 /src/lua
parent832e08e53f2364bda26afc6498ce968874d0fb33 (diff)
downloadrspamd-640c6f0f2df677c63bf602c03934394516c90121.tar.gz
rspamd-640c6f0f2df677c63bf602c03934394516c90121.zip
[Feature] Add new methods to lua_html to access HTML tags
Diffstat (limited to 'src/lua')
-rw-r--r--src/lua/lua_html.c207
1 files changed, 202 insertions, 5 deletions
diff --git a/src/lua/lua_html.c b/src/lua/lua_html.c
index b23530d6e..5e9b8df2b 100644
--- a/src/lua/lua_html.c
+++ b/src/lua/lua_html.c
@@ -85,7 +85,7 @@ LUA_FUNCTION_DEF (html, get_images);
/***
* @method html:get_blocks()
- * Retruns a table of html blocks. Each block provides the following data:
+ * Returns a table of html blocks. Each block provides the following data:
*
* `tag` - corresponding tag
* `color` - a triplet (r g b) for font color
@@ -95,11 +95,27 @@ LUA_FUNCTION_DEF (html, get_images);
*/
LUA_FUNCTION_DEF (html, get_blocks);
+/***
+ * @method html:foreach_tag(tagname, callback)
+ * Processes HTML tree calling the specified callback for each tag of the specified
+ * type.
+ *
+ * Callback is called with the following attributes:
+ *
+ * - `tag`: html tag structure
+ * - `content_length`: length of content within a tag
+ *
+ * Callback function should return `true` to **stop** processing and `false` to continue
+ * @return nothing
+ */
+LUA_FUNCTION_DEF (html, foreach_tag);
+
static const struct luaL_reg htmllib_m[] = {
LUA_INTERFACE_DEF (html, has_tag),
LUA_INTERFACE_DEF (html, has_property),
LUA_INTERFACE_DEF (html, get_images),
LUA_INTERFACE_DEF (html, get_blocks),
+ LUA_INTERFACE_DEF (html, foreach_tag),
{"__tostring", rspamd_lua_class_tostring},
{NULL, NULL}
};
@@ -123,10 +139,38 @@ LUA_FUNCTION_DEF (html_tag, get_extra);
*/
LUA_FUNCTION_DEF (html_tag, get_parent);
+/***
+ * @method html_tag:get_flags()
+ * Returns flags a specified tag:
+ *
+ * - `closed`: tag is properly closed
+ * - `closing`: tag is a closing tag
+ * - `broken`: tag is somehow broken
+ * - `unbalanced`: tag is unbalanced
+ * - `xml`: tag is xml tag
+ * @return {table} table of flags
+ */
+LUA_FUNCTION_DEF (html_tag, get_flags);
+/***
+ * @method html_tag:get_content()
+ * Returns content of tag (approximate for some cases)
+ * @return {rspamd_text} rspamd text with tag's content
+ */
+LUA_FUNCTION_DEF (html_tag, get_content);
+/***
+ * @method html_tag:get_content_length()
+ * Returns length of a tag's content
+ * @return {number} size of content enclosed within a tag
+ */
+LUA_FUNCTION_DEF (html_tag, get_content_length);
+
static const struct luaL_reg taglib_m[] = {
LUA_INTERFACE_DEF (html_tag, get_type),
LUA_INTERFACE_DEF (html_tag, get_extra),
LUA_INTERFACE_DEF (html_tag, get_parent),
+ LUA_INTERFACE_DEF (html_tag, get_flags),
+ LUA_INTERFACE_DEF (html_tag, get_content),
+ LUA_INTERFACE_DEF (html_tag, get_content_length),
{"__tostring", rspamd_lua_class_tostring},
{NULL, NULL}
};
@@ -330,12 +374,88 @@ lua_html_get_blocks (lua_State *L)
}
}
else {
- lua_pushnil (L);
+ return luaL_error (L, "invalid arguments");
}
return 1;
}
+struct lua_html_traverse_ud {
+ lua_State *L;
+ gint cbref;
+ gint tag_id;
+};
+
+static gboolean
+lua_html_node_foreach_cb (GNode *n, gpointer d)
+{
+ struct lua_html_traverse_ud *ud = d;
+ struct html_tag *tag = n->data, **ptag;
+
+ if (tag && (ud->tag_id == -1 || ud->tag_id == tag->id)) {
+
+ lua_rawgeti (ud->L, LUA_REGISTRYINDEX, ud->cbref);
+
+ ptag = lua_newuserdata (ud->L, sizeof (*ptag));
+ *ptag = tag;
+ rspamd_lua_setclass (ud->L, "rspamd{html_tag}", -1);
+ lua_pushnumber (ud->L, tag->content_length);
+
+ if (lua_pcall (ud->L, 2, 1, 0) != 0) {
+ lua_settop (ud->L, 0);
+ return TRUE;
+ }
+
+ if (lua_toboolean (ud->L, -1)) {
+ lua_settop (ud->L, 0);
+ return TRUE;
+ }
+
+ lua_settop (ud->L, 0);
+ }
+
+ return FALSE;
+}
+
+static gint
+lua_html_foreach_tag (lua_State *L)
+{
+ struct html_content *hc = lua_check_html (L, 1);
+ struct lua_html_traverse_ud ud;
+ const gchar *tagname;
+ gint id;
+
+ tagname = luaL_checkstring (L, 2);
+
+ if (hc && tagname && lua_isfunction (L, 3)) {
+ if (strcmp (tagname, "any") == 0) {
+ id = -1;
+ }
+ else {
+ id = rspamd_html_tag_by_name (tagname);
+
+ if (id == -1) {
+ return luaL_error (L, "invalid tagname: %s", tagname);
+ }
+ }
+
+ lua_pushvalue (L, 3);
+ ud.cbref = luaL_ref (L, LUA_REGISTRYINDEX);
+ ud.L = L;
+ ud.tag_id = id;
+
+ g_node_traverse (hc->html_tags, G_PRE_ORDER, G_TRAVERSE_ALL, -1,
+ lua_html_node_foreach_cb, &ud);
+
+ luaL_unref (L, LUA_REGISTRYINDEX, ud.cbref);
+ }
+ else {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ return 0;
+}
+
static gint
lua_html_tag_get_type (lua_State *L)
{
@@ -353,7 +473,7 @@ lua_html_tag_get_type (lua_State *L)
}
}
else {
- lua_error (L);
+ return luaL_error (L, "invalid arguments");
}
return 1;
@@ -375,7 +495,84 @@ lua_html_tag_get_parent (lua_State *L)
}
}
else {
- lua_error (L);
+ return luaL_error (L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+lua_html_tag_get_flags (lua_State *L)
+{
+ struct html_tag *tag = lua_check_html_tag (L, 1);
+ gint i = 1;
+
+ if (tag) {
+ /* Push flags */
+ lua_createtable (L, 4, 0);
+ if (tag->flags & FL_CLOSING) {
+ lua_pushstring (L, "closing");
+ lua_rawseti (L, -2, i++);
+ }
+ if (tag->flags & FL_CLOSED) {
+ lua_pushstring (L, "closed");
+ lua_rawseti (L, -2, i++);
+ }
+ if (tag->flags & FL_BROKEN) {
+ lua_pushstring (L, "broken");
+ lua_rawseti (L, -2, i++);
+ }
+ if (tag->flags & FL_XML) {
+ lua_pushstring (L, "xml");
+ lua_rawseti (L, -2, i++);
+ }
+ if (tag->flags & RSPAMD_HTML_FLAG_UNBALANCED) {
+ lua_pushstring (L, "unbalanced");
+ lua_rawseti (L, -2, i++);
+ }
+ }
+ else {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+lua_html_tag_get_content (lua_State *L)
+{
+ struct html_tag *tag = lua_check_html_tag (L, 1);
+ struct rspamd_lua_text *t;
+
+ if (tag) {
+ if (tag->content && tag->content_length) {
+ t = lua_newuserdata (L, sizeof (*t));
+ rspamd_lua_setclass (L, "rspamd{text}", -1);
+ t->start = tag->content;
+ t->len = tag->content_length;
+ t->own = FALSE;
+ }
+ else {
+ lua_pushnil (L);
+ }
+ }
+ else {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+lua_html_tag_get_content_length (lua_State *L)
+{
+ struct html_tag *tag = lua_check_html_tag (L, 1);
+
+ if (tag) {
+ lua_pushnumber (L, tag->content_length);
+ }
+ else {
+ return luaL_error (L, "invalid arguments");
}
return 1;
@@ -410,7 +607,7 @@ lua_html_tag_get_extra (lua_State *L)
}
}
else {
- lua_error (L);
+ return luaL_error (L, "invalid arguments");
}
return 1;