http_parser_parse_url (msg->url->str, msg->url->len, TRUE, &u);
if (u.field_set & (1 << UF_PATH)) {
- guint unnorm_len;
+ gsize unnorm_len;
lookup.begin = msg->url->str + u.field_data[UF_PATH].off;
lookup.len = u.field_data[UF_PATH].len;
http_parser_parse_url (msg->url->str, msg->url->len, TRUE, &u);
if (u.field_set & (1 << UF_PATH)) {
- guint unnorm_len;
+ gsize unnorm_len;
lookup.begin = msg->url->str + u.field_data[UF_PATH].off;
lookup.len = u.field_data[UF_PATH].len;
gsize decoded_len;
const gchar *p, *s, *prefix = "http://";
gchar *d;
- guint i, dlen;
+ guint i;
+ gsize dlen;
gboolean has_bad_chars = FALSE, no_prefix = FALSE;
static const gchar hexdigests[16] = "0123456789abcdef";
struct rspamd_url *turl;
gboolean url_found = FALSE;
struct rspamd_process_exception *ex;
+ enum rspamd_normalise_result norm_res;
+ guint saved_flags = 0;
+ gsize dlen;
- if (href_offset <= 0) {
+ if (href_offset < 0) {
/* No dispalyed url, just some text within <a> tag */
return;
}
url->visible_part = rspamd_mempool_alloc (pool, dest->len - href_offset + 1);
rspamd_strlcpy (url->visible_part, dest->data + href_offset,
dest->len - href_offset + 1);
- g_strstrip (url->visible_part);
+ dlen = dest->len - href_offset;
+ url->visible_part =
+ (gchar *)rspamd_string_len_strip (url->visible_part, &dlen, " \t\v\r\n");
+
+ norm_res = rspamd_normalise_unicode_inplace (pool, url->visible_part, &dlen);
+
+ if (norm_res & RSPAMD_UNICODE_NORM_UNNORMAL) {
+ saved_flags |= RSPAMD_URL_FLAG_UNNORMALISED;
+ }
rspamd_html_url_is_phished (pool, url,
- dest->data + href_offset,
- dest->len - href_offset,
+ url->visible_part,
+ dlen,
&url_found, &displayed_url);
if (url_found) {
- url->flags |= RSPAMD_URL_FLAG_DISPLAY_URL;
+ url->flags |= saved_flags|RSPAMD_URL_FLAG_DISPLAY_URL;
}
if (exceptions && url_found) {
http_parser_parse_url (msg->url->str, msg->url->len, TRUE, &u);
if (u.field_set & (1 << UF_PATH)) {
- guint unnorm_len;
+ gsize unnorm_len;
pathbuf = g_malloc (u.field_data[UF_PATH].len);
memcpy (pathbuf, msg->url->str + u.field_data[UF_PATH].off,
}
void
-rspamd_http_normalize_path_inplace (gchar *path, guint len, guint *nlen)
+rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen)
{
const gchar *p, *end, *slash = NULL, *dot = NULL;
gchar *o;
* @param len
* @param nlen
*/
-void rspamd_http_normalize_path_inplace (gchar *path, guint len, guint *nlen);
+void rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen);
#ifdef __cplusplus
}
gchar *p;
const gchar *end;
guint i, complen, ret, flags = 0;
- guint unquoted_len = 0;
+ gsize unquoted_len = 0;
memset (uri, 0, sizeof (*uri));
memset (&u, 0, sizeof (u));
enum rspamd_normalise_result
rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool, gchar *start,
- guint *len)
+ gsize *len)
{
#if U_ICU_VERSION_MAJOR_NUM >= 44
UErrorCode uc_err = U_ZERO_ERROR;
* @return TRUE if a string has been normalised
*/
enum rspamd_normalise_result rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool,
- gchar *start, guint *len);
+ gchar *start, gsize *len);
enum rspamd_regexp_escape_flags {
RSPAMD_REGEXP_ESCAPE_ASCII = 0,