/*
 * Functions for simple html parsing
 */

#ifndef RSPAMD_HTML_H
#define RSPAMD_HTML_H

#include "config.h"
#include "mem_pool.h"

/*
 * HTML content flags
 */
#define RSPAMD_HTML_FLAG_BAD_START (1 << 0)
#define RSPAMD_HTML_FLAG_BAD_ELEMENTS (1 << 1)
#define RSPAMD_HTML_FLAG_XML (1 << 2)
#define RSPAMD_HTML_FLAG_UNBALANCED (1 << 3)
#define RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS (1 << 4)
#define RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS (1 << 5)

/*
 * Image flags
 */
#define RSPAMD_HTML_FLAG_IMAGE_EMBEDDED (1 << 0)
#define RSPAMD_HTML_FLAG_IMAGE_EXTERNAL (1 << 1)

enum html_component_type {
	RSPAMD_HTML_COMPONENT_NAME = 0,
	RSPAMD_HTML_COMPONENT_HREF,
	RSPAMD_HTML_COMPONENT_COLOR,
	RSPAMD_HTML_COMPONENT_STYLE,
	RSPAMD_HTML_COMPONENT_CLASS,
	RSPAMD_HTML_COMPONENT_WIDTH,
	RSPAMD_HTML_COMPONENT_HEIGHT
};

struct html_tag_component {
	enum html_component_type type;
	guint len;
	const guchar *start;
};

struct html_image {
	guint height;
	guint width;
	guint flags;
	gchar *src;
	struct html_tag *tag;
};

struct html_color {
	union {
		struct {
#if !defined(BYTE_ORDER) || BYTE_ORDER == LITTLE_ENDIAN
			guint8 b;
			guint8 g;
			guint8 r;
			guint8 alpha;
#else
			guint8 alpha;
			guint8 r;
			guint8 g;
			guint8 b;
#endif
		} comp;
		guint32 val;
	} d;
	gboolean valid;
};

struct html_block {
	struct html_tag *tag;
	struct html_color font_color;
	struct html_color background_color;
	struct html_tag_component style;
	guint font_size;
	gchar *class;
};

struct html_tag {
	gint id;
	gint flags;
	struct html_tag_component name;
	GQueue *params;
	gpointer extra; /** Additional data associated with tag (e.g. image) */
	GNode *parent;
};

/* Forwarded declaration */
struct rspamd_task;

struct html_content {
	GNode *html_tags;
	gint flags;
	guchar *tags_seen;
	GPtrArray *images;
	GPtrArray *blocks;
};

/*
 * Decode HTML entitles in text. Text is modified in place.
 */
guint rspamd_html_decode_entitles_inplace (gchar *s, guint len);

GByteArray* rspamd_html_process_part (rspamd_mempool_t *pool,
		struct html_content *hc,
		GByteArray *in);

GByteArray* rspamd_html_process_part_full (rspamd_mempool_t *pool,
		struct html_content *hc,
		GByteArray *in, GList **exceptions, GHashTable *urls, GHashTable *emails);

/*
 * Returns true if a specified tag has been seen in a part
 */
gboolean rspamd_html_tag_seen (struct html_content *hc, const gchar *tagname);

/**
 * Returns name for the specified tag id
 * @param id
 * @return
 */
const gchar* rspamd_html_tag_by_id (gint id);

/**
 * Extract URL from HTML tag component and sets component elements if needed
 * @param pool
 * @param start
 * @param len
 * @param comp
 * @return
 */
struct rspamd_url * rspamd_html_process_url (rspamd_mempool_t *pool,
		const gchar *start, guint len,
		struct html_tag_component *comp);

#endif