return FALSE;
}
-gboolean
-rspamd_html_tag_seen (struct html_content *hc, const gchar *tagname)
+gint
+rspamd_html_tag_by_name (const gchar *name)
{
struct html_tag tag;
struct html_tag_def *found;
- g_assert (hc != NULL);
- g_assert (hc->tags_seen != NULL);
-
- tag.name.start = tagname;
- tag.name.len = strlen (tagname);
+ tag.name.start = name;
+ tag.name.len = strlen (name);
found = bsearch (&tag, tag_defs, G_N_ELEMENTS (tag_defs),
sizeof (tag_defs[0]), tag_find);
if (found) {
- return isset (hc->tags_seen, found->id);
+ return found->id;
+ }
+
+ return -1;
+}
+
+gboolean
+rspamd_html_tag_seen (struct html_content *hc, const gchar *tagname)
+{
+ gint id;
+
+ g_assert (hc != NULL);
+ g_assert (hc->tags_seen != NULL);
+
+ id = rspamd_html_tag_by_name (tagname);
+
+ if (id != -1) {
+ return isset (hc->tags_seen, id);
}
return FALSE;
else {
parent = (*cur_level)->data;
- if (parent && (parent->flags & FL_IGNORE)) {
- tag->flags |= FL_IGNORE;
+ if (parent) {
+ if ((parent->flags & FL_IGNORE)) {
+ tag->flags |= FL_IGNORE;
+ }
+
+ parent->content_length += tag->content_length;
}
g_node_append (*cur_level, nnode);
guint obrace = 0, ebrace = 0;
GNode *cur_level = NULL;
gint substate = 0, len, href_offset = -1;
- struct html_tag *cur_tag = NULL;
+ struct html_tag *cur_tag = NULL, *content_tag = NULL;
struct rspamd_url *url = NULL, *turl;
struct rspamd_process_exception *ex;
enum {
save_space = FALSE;
}
}
-
- if (cur_tag) {
- cur_tag->content_length ++;
- }
}
else {
if (c != p) {
}
g_byte_array_append (dest, c, len);
+
+ if (content_tag) {
+ content_tag->content_length = len;
+ content_tag->content = c;
+ content_tag = NULL;
+ }
}
state = tag_begin;
setbit (hc->tags_seen, cur_tag->id);
}
+ if (!(cur_tag->flags & (FL_CLOSED|FL_CLOSING))) {
+ content_tag = cur_tag;
+ }
+
/* Handle newlines */
if (cur_tag->id == Tag_BR || cur_tag->id == Tag_HR) {
if (dest->len > 0 && dest->data[dest->len - 1] != '\n') {
gchar *class;
};
+/* Public tags flags */
+/* XML tag */
+#define FL_XML (1 << 23)
+/* Closing tag */
+#define FL_CLOSING (1 << 24)
+/* Fully closed tag (e.g. <a attrs />) */
+#define FL_CLOSED (1 << 25)
+#define FL_BROKEN (1 << 26)
+#define FL_IGNORE (1 << 27)
+#define FL_BLOCK (1 << 28)
+
struct html_tag {
gint id;
gint flags;
gsize content_length;
+ const gchar *content;
struct html_tag_component name;
GQueue *params;
gpointer extra; /** Additional data associated with tag (e.g. image) */
*/
const gchar* rspamd_html_tag_by_id (gint id);
+/**
+ * Returns HTML tag id by name
+ * @param name
+ * @return
+ */
+gint rspamd_html_tag_by_name (const gchar *name);
+
/**
* Extract URL from HTML tag component and sets component elements if needed
* @param pool
#define CM_OMITST (1 << 21)
/* Unique elements */
#define CM_UNIQUE (1 << 22)
-/* XML tag */
-#define FL_XML (1 << 23)
-/* Closing tag */
-#define FL_CLOSING (1 << 24)
-/* Fully closed tag (e.g. <a attrs />) */
-#define FL_CLOSED (1 << 25)
-#define FL_BROKEN (1 << 26)
-#define FL_IGNORE (1 << 27)
-#define FL_BLOCK (1 << 28)
+
#endif /* SRC_LIBSERVER_HTML_TAGS_H_ */