{
struct uri *new;
gchar *url_str;
- const gchar *p;
+ const gchar *p, *c;
gsize len = 0;
gint off, rc;
p = url_text;
- while (len < remain) {
+ while (len < remain - 1) {
if (*p == '<' || *p == '>') {
break;
}
if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) {
if (g_ascii_strncasecmp (href_url->host, new->host,
MAX (href_url->hostlen, new->hostlen)) != 0) {
- href_url->is_phished = TRUE;
- href_url->phished_url = new;
+ /* Special check for urls beginning with 'www' */
+ if (new->hostlen > 4 && href_url->hostlen > 4) {
+ p = new->host;
+ c = NULL;
+ if ((p[0] == 'w' || p[0] == 'W') &&
+ (p[1] == 'w' || p[1] == 'W') &&
+ (p[2] == 'w' || p[2] == 'W') &&
+ (p[3] == '.')) {
+ p += 4;
+ c = href_url->host;
+ len = MAX (href_url->hostlen, new->hostlen - 4);
+ }
+ else {
+ p = href_url->host;
+ if ((p[0] == 'w' || p[0] == 'W') &&
+ (p[1] == 'w' || p[1] == 'W') &&
+ (p[2] == 'w' || p[2] == 'W') &&
+ (p[3] == '.')) {
+ p += 4;
+ c = new->host;
+ len = MAX (href_url->hostlen - 4, new->hostlen);
+ }
+ }
+ /* Compare parts and check for phished hostname */
+ if (c != NULL && g_ascii_strncasecmp (p, c, len) != 0) {
+ href_url->is_phished = TRUE;
+ href_url->phished_url = new;
+ }
+ }
+ else {
+ href_url->is_phished = TRUE;
+ href_url->phished_url = new;
+ }
}
}
else {
* Check for phishing
*/
if ((p = strchr (c, '>')) != NULL ) {
- check_phishing (task, url, p + 1, remain - (p - tag_text));
+ p ++;
+ check_phishing (task, url, p, remain - (p - tag_text));
}
if (part->html_urls && g_tree_lookup (part->html_urls, url_text) == NULL) {
g_tree_insert (part->html_urls, url_text, url);
}
}
- while (p < end - 1 && *p != stop && is_urlsafe (*p)) {
+ while (p < end && *p != stop && is_urlsafe (*p)) {
p ++;
}
if (is_atom (*p)) {
/* might be a domain or user@domain */
c = p;
- while (p < end - 1) {
+ while (p < end) {
if (!is_atom (*p)) {
break;
}
p++;
- while (p < end - 1 && is_atom (*p)) {
+ while (p < end && is_atom (*p)) {
p++;
}
}
else if (is_domain (*p)) {
domain:
- while (p < end - 1) {
+ while (p < end) {
if (!is_domain (*p)) {
break;
}
p++;
- while (p < end - 1 && is_domain (*p)) {
+ while (p < end && is_domain (*p)) {
p++;
}
- if ((p + 1) < end - 1 && *p == '.' && (is_domain (*(p + 1)) || *(p + 1) == '/')) {
+ if ((p + 1) < end && *p == '.' && (is_domain (*(p + 1)) || *(p + 1) == '/')) {
p++;
}
}
if (is_digit (*p) || passwd) {
port = (*p++ - '0');
- while (p < end - 1 && is_digit (*p) && port < 65536) {
+ while (p < end && is_digit (*p) && port < 65536) {
port = (port * 10) + (*p++ - '0');
}
passwd = TRUE;
c = p;
- while (p < end - 1 && is_atom (*p)) {
+ while (p < end && is_atom (*p)) {
p++;
}
case '/': /* we've detected a path component to our url */
p++;
case '?':
- while (p < end - 1 && is_urlsafe (*p)) {
+ while (p < end && is_urlsafe (*p)) {
if (*p == open_brace) {
brace_stack++;
}