diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-07-20 19:57:24 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-07-20 19:57:24 +0400 |
commit | 3499fefcd2a33d80d5f9400a161106a2bd1ff72f (patch) | |
tree | b0384dedb76e3b64cd8ea9ec5cb72477c184110e /src/html.c | |
parent | f86068d197719b8758fc0a2aeb8556526b2331f8 (diff) | |
download | rspamd-3499fefcd2a33d80d5f9400a161106a2bd1ff72f.tar.gz rspamd-3499fefcd2a33d80d5f9400a161106a2bd1ff72f.zip |
* Decode html entitles in urls while extracting urls values from html tags
NOTE: works only for ascii symbols
Diffstat (limited to 'src/html.c')
-rw-r--r-- | src/html.c | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/src/html.c b/src/html.c index 7c2cf4409..84db67d80 100644 --- a/src/html.c +++ b/src/html.c @@ -259,6 +259,51 @@ get_tag_by_name (const char *name) return bsearch (&key, tag_defs, G_N_ELEMENTS (tag_defs), sizeof (struct html_tag), tag_cmp); } +/* Decode HTML entitles in text */ +static void +decode_entitles (char *s) +{ + char *t = s; /* t - tortoise */ + char *h = s; /* h - hare */ + char *end_ptr; + int state = 0, val; + + while (*h) { + switch (state) { + /* Out of entitle */ + case 0: + if (*h == '&' && *(h + 1) == '#') { + state = 1; + h ++; + continue; + } + else { + *t = *h; + h ++; + t ++; + } + break; + case 1: + if (*h == ';') { + val = strtoul ((t + 2), &end_ptr, 10); + if ((end_ptr != NULL && *end_ptr != ';') || val < 0 || val > 128) { + msg_info ("decode_entitles: invalid entitle code, cannot convert, %d", val); + *t = 'U'; + } + else { + *t = (char)val; + } + state = 0; + t ++; + } + h ++; + break; + } + } + *t = '\0'; + +} + static void parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t id, char *tag_text) { @@ -331,6 +376,7 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i url_text = memory_pool_alloc (task->task_pool, len + 1); g_strlcpy (url_text, c, len + 1); + decode_entitles (url_text); url = memory_pool_alloc (task->task_pool, sizeof (struct uri)); rc = parse_uri (url, url_text, task->task_pool); |