aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-09-16 15:53:18 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-09-16 15:53:18 +0400
commitc8dd7855398d175e8482e7bd5cfb499e7b1b3258 (patch)
tree8631881ca0287878987dec7fe3edbdbc041316ce
parentf3b338f1583c90a16fcd65e843bc875f4309a2b4 (diff)
downloadrspamd-c8dd7855398d175e8482e7bd5cfb499e7b1b3258.tar.gz
rspamd-c8dd7855398d175e8482e7bd5cfb499e7b1b3258.zip
* Decode all html entitles in html parts
-rw-r--r--src/html.c21
-rw-r--r--src/html.h1
-rw-r--r--src/message.c1
3 files changed, 18 insertions, 5 deletions
diff --git a/src/html.c b/src/html.c
index 5560f920a..1381fdd4f 100644
--- a/src/html.c
+++ b/src/html.c
@@ -260,16 +260,24 @@ get_tag_by_name (const char *name)
}
/* Decode HTML entitles in text */
-static void
-decode_entitles (char *s)
+void
+decode_entitles (char *s, guint *len)
{
+ guint l;
char *t = s; /* t - tortoise */
char *h = s; /* h - hare */
char *e = s;
char *end_ptr;
int state = 0, val, base;
+
+ if (len == NULL || *len == 0) {
+ l = strlen (s);
+ }
+ else {
+ l = *len;
+ }
- while (*h) {
+ while (h - s < l) {
switch (state) {
/* Out of entitle */
case 0:
@@ -319,7 +327,10 @@ decode_entitles (char *s)
}
}
*t = '\0';
-
+
+ if (len != NULL) {
+ *len = t - s;
+ }
}
static void
@@ -394,7 +405,7 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i
url_text = memory_pool_alloc (task->task_pool, len + 1);
g_strlcpy (url_text, c, len + 1);
- decode_entitles (url_text);
+ decode_entitles (url_text, NULL);
if (g_ascii_strncasecmp (url_text, "http://", sizeof ("http://") - 1) != 0) {
return;
diff --git a/src/html.h b/src/html.h
index 1a7924e08..e81a9d11c 100644
--- a/src/html.h
+++ b/src/html.h
@@ -209,5 +209,6 @@ struct worker_task;
gboolean add_html_node (struct worker_task *task, memory_pool_t *pool, struct mime_text_part *part, char *tag_text, GNode **cur_level);
struct html_tag * get_tag_by_name (const char *name);
+void decode_entitles (char *s, guint *len);
#endif
diff --git a/src/message.c b/src/message.c
index c84aaf6bb..1416f22ab 100644
--- a/src/message.c
+++ b/src/message.c
@@ -533,6 +533,7 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont
url_parse_text (task->task_pool, task, text_part, FALSE);
}
else {
+ decode_entitles (text_part->content->data, &text_part->content->len);
url_parse_text (task->task_pool, task, text_part, FALSE);
#if 0
url_parse_text (task->task_pool, task, text_part, TRUE);