Use another approach to parse emails.

author: Vsevolod Stakhov <vsevolod@highsecure.ru> 2015-09-07 21:59:32 +0100
committer: Vsevolod Stakhov <vsevolod@highsecure.ru> 2015-09-07 21:59:32 +0100
commit: 27cd26c8d57e29304d4e48fe739fa02a5922a77b (patch)
tree: 341211b63660e43adbb3e81e905b9cb5d76f4bd5 /src/libserver
parent: 469ef22eb5ea6573c6e74083a91bee2afab528a4 (diff)
download: rspamd-27cd26c8d57e29304d4e48fe739fa02a5922a77b.tar.gz
rspamd-27cd26c8d57e29304d4e48fe739fa02a5922a77b.zip
1 files changed, 28 insertions, 53 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 97af78cea..1342ae92c 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -24,6 +24,7 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <http_parser.h>
 #include "config.h"
 #include "url.h"
 #include "util.h"
@@ -334,7 +335,7 @@ rspamd_url_init (const gchar *tld_file)
 
 static gint
 rspamd_mailto_parse (struct http_parser_url *u, const gchar *str, gsize len,
-		gchar const **end)
+		gchar const **end, gboolean strict)
 {
 	const gchar *p = str, *c = str, *last = str + len;
 	gchar t;
@@ -475,6 +476,10 @@ rspamd_mailto_parse (struct http_parser_url *u, const gchar *str, gsize len,
 		*end = p;
 	}
 
+	if (!strict) {
+		return 1;
+	}
+
 	return ret;
 }
 
@@ -1126,7 +1131,7 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len,
 	if (len > sizeof ("mailto:") - 1) {
 		/* For mailto: urls we also need to add slashes to make it a valid URL */
 		if (g_ascii_strncasecmp (p, "mailto:", sizeof ("mailto:") - 1) == 0) {
-			ret = rspamd_mailto_parse (&u, uristring, len, &end);
+			ret = rspamd_mailto_parse (&u, uristring, len, &end, TRUE);
 		}
 		else {
 			ret = rspamd_web_parse (&u, uristring, len, &end, TRUE);
@@ -1443,35 +1448,13 @@ url_email_start (struct url_callback_data *cb,
 		const gchar *pos,
 		url_match_t *match)
 {
-	const gchar *p;
-	/* Check what we have found */
-	if (pos > cb->begin && *pos == '@') {
-		/* Try to extract it with username */
-		p = pos - 1;
-		while (p > cb->begin && is_urlsafe (*p) && *p != ':') {
-			p--;
-		}
+	if (!match->prefix || match->prefix[0] == '\0') {
+		/* We have mailto:// at the beginning */
+		match->m_begin = pos;
 
-		/*
-		 * If we've found something special but not ':' then we can try this as
-		 * email address
-		 */
-		if (!is_urlsafe (*p) && p != pos - 1 && *p != ':') {
-			match->m_begin = p + 1;
-			return TRUE;
-		}
-		else if (p == cb->begin) {
-			match->m_begin = p;
-			return TRUE;
-		}
-	}
-	else {
-		p = pos + strlen (match->pattern);
-		if (is_atom (*p)) {
-			match->m_begin = pos;
-			return TRUE;
-		}
+		return TRUE;
 	}
+
 	return FALSE;
 }
 
@@ -1480,37 +1463,29 @@ url_email_end (struct url_callback_data *cb,
 		const gchar *pos,
 		url_match_t *match)
 {
-	const gchar *p;
-	gboolean got_at = FALSE;
-
-	p = pos + strlen (match->pattern);
-	if (*pos == '@') {
-		got_at = TRUE;
-	}
+	const gchar *last = NULL;
+	struct http_parser_url u;
 
-	while (p < cb->end && (is_domain (*p) || *p == '_'
-					   || (*p == '@' && !got_at) ||
-					   *p == '.')) {
+	if (!match->prefix || match->prefix[0] == '\0') {
+		/* We have mailto:// at the beginning */
+		if (rspamd_mailto_parse (&u, pos, cb->end - pos, &last, FALSE) != 0) {
+			return FALSE;
+		}
 
-		if (*p == '@') {
-			got_at = TRUE;
+		if (!(u.field_set & (1 << UF_USERINFO))) {
+			return FALSE;
 		}
 
-		p++;
-	}
+		cb->last_at = match->m_begin + u.field_data[UF_USERINFO].off +
+				u.field_data[UF_USERINFO].len;
 
-	/* Strip strange symbols at the end */
-	if (got_at && p < cb->end) {
-		while (p >= match->m_begin &&
-			   (!is_domain (*p) || *p == '.' || *p == '_')) {
-			p--;
-		}
-		p++;
-	}
+		g_assert (*cb->last_at == '@');
+		match->m_len = (last - pos);
 
-	match->m_len = p - match->m_begin;
+		return TRUE;
+	}
 
-	return got_at;
+	return FALSE;
 }
 
 void
author	Vsevolod Stakhov <vsevolod@highsecure.ru>	2015-09-07 21:59:32 +0100
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>	2015-09-07 21:59:32 +0100
commit	27cd26c8d57e29304d4e48fe739fa02a5922a77b (patch)
tree	341211b63660e43adbb3e81e905b9cb5d76f4bd5 /src/libserver
parent	469ef22eb5ea6573c6e74083a91bee2afab528a4 (diff)
download	rspamd-27cd26c8d57e29304d4e48fe739fa02a5922a77b.tar.gz rspamd-27cd26c8d57e29304d4e48fe739fa02a5922a77b.zip