url_match_t *match)
{
const gchar *p = pos;
+ guint processed = 0;
+ static const guint max_shift = 253 + sizeof ("https://");
/* Try to find the start of the url by finding any non-urlsafe character or whitespace/punctuation */
while (p >= cb->begin) {
}
p--;
+ processed ++;
+
+ if (processed > max_shift) {
+ /* Too long */
+ return FALSE;
+ }
}
return FALSE;
url_match_t *match)
{
/* Check what we have found */
- if (pos > cb->begin &&
- (g_ascii_strncasecmp (pos, "www", 3) == 0 ||
- g_ascii_strncasecmp (pos, "ftp", 3) == 0)) {
-
- if (!(is_url_start (*(pos - 1)) ||
- g_ascii_isspace (*(pos - 1)) ||
- pos - 1 == match->prev_newline_pos ||
- (*(pos - 1) & 0x80))) { /* Chinese trick */
- return FALSE;
+ if (pos > cb->begin) {
+ if (g_ascii_strncasecmp (pos, "www", 3) == 0 ||
+ g_ascii_strncasecmp (pos, "ftp", 3) == 0) {
+
+ if (!(is_url_start (*(pos - 1)) ||
+ g_ascii_isspace (*(pos - 1)) ||
+ pos - 1 == match->prev_newline_pos ||
+ (*(pos - 1) & 0x80))) { /* Chinese trick */
+ return FALSE;
+ }
+ }
+ else {
+ guchar prev = *(pos - 1);
+
+ if (g_ascii_isalnum (prev)) {
+ /* Part of another url */
+ return FALSE;
+ }
}
}
}
cb->start = m.m_begin;
- cb->fin = m.m_begin + m.m_len;
+ cb->fin = pos;
return 1;
}
}
cb->start = m.m_begin;
- cb->fin = m.m_begin + m.m_len;
+ cb->fin = pos;
url = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_url));
g_strstrip (cb->url_str);
rc = rspamd_url_parse (url, cb->url_str,
}
if (cb->func) {
- cb->func (url, cb->start - text, cb->fin - text, cb->funcd);
+ cb->func (url, cb->start - text, (m.m_begin + m.m_len) - text,
+ cb->funcd);
}
}
else if (rc != URI_ERRNO_OK) {