summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/libmime/message.c6
-rw-r--r--src/libserver/html.c34
-rw-r--r--src/libserver/html.h4
3 files changed, 39 insertions, 5 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 6c1bad06c..cdd532f5a 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -1137,10 +1137,12 @@ process_text_part (struct rspamd_task *task,
text_part->mime_part = mime_part;
text_part->flags |= RSPAMD_MIME_PART_FLAG_BALANCED;
- text_part->content = rspamd_html_process_part (
+ text_part->content = rspamd_html_process_part_full (
task->task_pool,
text_part->html,
- part_content);
+ part_content,
+ &text_part->urls_offset,
+ task->urls);
rspamd_url_text_extract (task->task_pool, task, text_part, TRUE);
diff --git a/src/libserver/html.c b/src/libserver/html.c
index c23b228b6..f8220eabd 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1382,8 +1382,8 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
}
GByteArray*
-rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc,
- GByteArray *in)
+rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
+ GByteArray *in, GList **exceptions, GHashTable *urls)
{
const guchar *p, *c, *end, *tag_start = NULL, *savep = NULL;
guchar t;
@@ -1391,8 +1391,9 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc,
GByteArray *dest;
guint obrace = 0, ebrace = 0;
GNode *cur_level = NULL;
- gint substate, len;
+ gint substate, len, href_offset = -1;
struct html_tag *cur_tag = NULL;
+ struct process_exception *ex;
enum {
parse_start = 0,
tag_begin,
@@ -1696,6 +1697,25 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc,
g_byte_array_append (dest, "\r\n", 2);
save_space = FALSE;
}
+
+ if (cur_tag->id == Tag_A) {
+ if (!(cur_tag->flags & (FL_CLOSED|FL_CLOSING))) {
+ href_offset = dest->len;
+ }
+ else if (cur_tag->flags & FL_CLOSING) {
+ /* Insert exception */
+ if (exceptions && href_offset != -1
+ && (gint)dest->len > href_offset) {
+ ex = rspamd_mempool_alloc (pool, sizeof (*ex));
+ ex->pos = href_offset;
+ ex->len = dest->len - href_offset;
+
+ *exceptions = g_list_prepend (*exceptions, ex);
+ }
+
+ href_offset = -1;
+ }
+ }
}
else {
state = content_write;
@@ -1711,3 +1731,11 @@ rspamd_html_process_part (rspamd_mempool_t *pool, struct html_content *hc,
return dest;
}
+
+GByteArray*
+rspamd_html_process_part (rspamd_mempool_t *pool,
+ struct html_content *hc,
+ GByteArray *in)
+{
+ return rspamd_html_process_part_full (pool, hc, in, NULL, NULL);
+}
diff --git a/src/libserver/html.h b/src/libserver/html.h
index 83c58c9f1..1a98a3e9a 100644
--- a/src/libserver/html.h
+++ b/src/libserver/html.h
@@ -57,4 +57,8 @@ GByteArray* rspamd_html_process_part (rspamd_mempool_t *pool,
struct html_content *hc,
GByteArray *in);
+GByteArray* rspamd_html_process_part_full (rspamd_mempool_t *pool,
+ struct html_content *hc,
+ GByteArray *in, GList **exceptions, GHashTable *urls);
+
#endif