const gchar *pattern;
const gchar *prefix;
gboolean add_prefix;
+ gchar st;
} url_match_t;
#define URL_FLAG_NOHTML (1 << 0)
url_match_t *match)
{
match->m_begin = pos;
+
+ if (pos > cb->begin - 1) {
+ match->st = *(pos - 1);
+ }
+ else {
+ match->st = '\0';
+ }
+
return TRUE;
}
/* Try to find the start of the url by finding any non-urlsafe character or whitespace/punctuation */
while (p >= cb->begin) {
- if ((!is_domain (*p) && *p != '.' &&
- *p != '/') || g_ascii_isspace (*p)) {
-
+ if (!is_domain (*p) || g_ascii_isspace (*p) || is_url_start (*p)) {
if (!is_url_start (*p) && !g_ascii_isspace (*p)) {
return FALSE;
}
+ match->st = *p;
+
p++;
if (!g_ascii_isalnum (*p)) {
return TRUE;
}
else if (p == cb->begin && p != pos) {
+ match->st = '\0';
match->m_begin = p;
+
return TRUE;
}
else if (*p == '.') {
/* Urls cannot contain '/' in their body */
return FALSE;
}
+
p--;
}
match->m_len = p - match->m_begin;
return TRUE;
}
- else if (*p == '/' || *p == ':') {
+ else if (*p == '/' || *p == ':' || is_url_end (*p)) {
/* Parse arguments, ports by normal way by url default function */
p = match->m_begin;
/* Check common prefix */
return FALSE;
}
+ if (pos > cb->begin) {
+ match->st = *(pos - 1);
+ }
+ else {
+ match->st = '\0';
+ }
+
match->m_begin = pos;
return TRUE;
return FALSE;
}
+ if (last < cb->end && *last == '>') {
+ /* We need to ensure that url also starts with '>' */
+ if (match->st != '<') {
+ return FALSE;
+ }
+ }
+
match->m_len = (last - pos);
return TRUE;
}
}
+ if (pos > cb->begin - 1) {
+ match->st = *(pos - 1);
+ }
+ else {
+ match->st = '\0';
+ }
+
return TRUE;
}
pos = &cb->begin[textpos];
if (pos < cb->end) {
if (!g_ascii_isspace (*pos) && *pos != '/' && *pos != '?' &&
- *pos != ':') {
+ *pos != ':' && !is_url_end (*pos)) {
if (*pos == '.') {
/* We allow . at the end of the domain however */
pos++;
if (pos < cb->end) {
if (!g_ascii_isspace (*pos) && *pos != '/' &&
- *pos != '?' && *pos != ':') {
+ *pos != '?' && *pos != ':' && !is_url_end (*pos)) {
return 0;
}
}