if (!rspamd_url_is_subdomain(disp_tok, href_tok)) {
href_url->flags |= RSPAMD_URL_FLAG_PHISHED;
- href_url->linked_url = text_url;
text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
+
+ if (href_url->ext == nullptr) {
+ href_url->ext = rspamd_mempool_alloc0_type(pool, rspamd_url_ext);
+ }
+ href_url->ext->linked_url = text_url;
}
}
}
return;
}
- url->visible_part = rspamd_mempool_alloc_buffer(pool, visible_part.size() + 1);
- rspamd_strlcpy(url->visible_part,
+ if (url->ext == nullptr) {
+ url->ext = rspamd_mempool_alloc0_type(pool, rspamd_url_ext);
+ }
+ url->ext->visible_part = rspamd_mempool_alloc_buffer(pool, visible_part.size() + 1);
+ rspamd_strlcpy(url->ext->visible_part,
visible_part.data(),
visible_part.size() + 1);
dlen = visible_part.size();
/* Strip unicode spaces from the start and the end */
- url->visible_part = const_cast<char *>(
- rspamd_string_unicode_trim_inplace(url->visible_part,
+ url->ext->visible_part = const_cast<char *>(
+ rspamd_string_unicode_trim_inplace(url->ext->visible_part,
&dlen));
auto maybe_url = html_url_is_phished(pool, url,
- {url->visible_part, dlen});
+ {url->ext->visible_part, dlen});
if (maybe_url) {
url->flags |= saved_flags;
}
}
- rspamd_normalise_unicode_inplace(url->visible_part, &dlen);
+ rspamd_normalise_unicode_inplace(url->ext->visible_part, &dlen);
}
auto
ucl_object_insert_key (obj, flags, "flags", 0, false);
- if (url->linked_url) {
- encoded = rspamd_url_encode (url->linked_url, &enclen, task->task_pool);
- elt = rspamd_protocol_extended_url (task, url->linked_url, encoded,
+ if (url->ext && url->ext->linked_url) {
+ encoded = rspamd_url_encode (url->ext->linked_url, &enclen, task->task_pool);
+ elt = rspamd_protocol_extended_url (task, url->ext->linked_url, encoded,
enclen);
ucl_object_insert_key (obj, elt, "linked_url", 0, false);
}
uri->flags |= RSPAMD_URL_FLAG_NUMERIC;
/* Reconstruct URL */
- if (uri->flags & RSPAMD_URL_FLAG_HAS_PORT) {
+ if (uri->flags & RSPAMD_URL_FLAG_HAS_PORT && uri->ext) {
p = strbuf + r;
start_offset = p + 1;
r += rspamd_snprintf (strbuf + r, slen - r, ":%ud",
- (unsigned int)uri->port);
+ (unsigned int)uri->ext->port);
}
if (uri->datalen > 0) {
p = strbuf + r;
}
}
- uri->port = u.port;
uri->flags = flags;
if (!uri->hostlen) {
struct rspamd_url_tag *prev, *next;
};
-
+struct rspamd_url_ext;
+/**
+ * URL structure
+ */
struct rspamd_url {
- gchar *string;
- gchar *raw;
+ char *string;
+ char *raw;
+ struct rspamd_url_ext *ext;
- gchar *visible_part;
- struct rspamd_url *linked_url;
+ uint32_t flags;
- guint32 flags;
+ uint8_t protocol;
+ uint8_t protocollen;
- guint8 protocol;
- guint8 protocollen;
-
- guint16 port;
+ uint16_t hostshift;
+ uint16_t datashift;
+ uint16_t queryshift;
+ uint16_t fragmentshift;
+ uint16_t tldshift;
guint16 usershift;
- guint16 hostshift;
- guint16 datashift;
- guint16 queryshift;
- guint16 fragmentshift;
- guint16 tldshift;
guint16 userlen;
- guint16 hostlen;
- guint16 datalen;
- guint16 querylen;
- guint16 fragmentlen;
- guint16 tldlen;
- guint16 count;
- guint16 urllen;
- guint16 rawlen;
+
+ uint16_t hostlen;
+ uint16_t datalen;
+ uint16_t querylen;
+ uint16_t fragmentlen;
+ uint16_t tldlen;
+ uint16_t count;
+ uint16_t urllen;
+ uint16_t rawlen;
+};
+
+/**
+ * Rarely used url fields
+ */
+struct rspamd_url_ext {
+ gchar *visible_part;
+ struct rspamd_url *linked_url;
+
+ guint16 port;
};
#define rspamd_url_user(u) ((u)->userlen > 0 ? (u)->string + (u)->usershift : NULL)
*/
int rspamd_url_cmp_qsort(const void *u1, const void *u2);
+static inline uint16_t rspamd_url_get_port(struct rspamd_url *u)
+{
+ if (u->flags & RSPAMD_URL_FLAG_HAS_PORT && u->ext) {
+ return u->ext->port;
+ }
+ else {
+ /* Assume standard port */
+ if (u->protocol == PROTOCOL_HTTPS) {
+ return 443;
+ }
+ else {
+ return 80;
+ }
+ }
+}
+
/**
* Normalize unicode input and set out url flags as appropriate
* @param pool
struct rspamd_lua_url *url = lua_check_url (L, 1);
if (url != NULL) {
- lua_pushinteger (L, url->url->port);
+ lua_pushinteger (L, rspamd_url_get_port(url->url));
}
else {
lua_pushnil (L);
struct rspamd_lua_url *purl, *url = lua_check_url (L, 1);
if (url) {
- if (url->url->linked_url != NULL) {
+ if (url->url->ext && url->url->ext->linked_url != NULL) {
+ /* XXX: in fact, this is the only possible combination of flags, so this check is redundant */
if (url->url->flags &
(RSPAMD_URL_FLAG_PHISHED|RSPAMD_URL_FLAG_REDIRECTED)) {
purl = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
rspamd_lua_setclass (L, "rspamd{url}", -1);
- purl->url = url->url->linked_url;
+ purl->url = url->url->ext->linked_url;
return 1;
}
redir = lua_check_url (L, -1);
url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
- url->url->linked_url = redir->url;
+
+ if (url->url->ext == NULL) {
+ url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext);
+ }
+ url->url->ext->linked_url = redir->url;
}
}
else {
}
url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
- url->url->linked_url = redir->url;
+ if (url->url->ext == NULL) {
+ url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext);
+ }
+ url->url->ext->linked_url = redir->url;
/* Push back on stack */
lua_pushvalue (L, 2);
LUA_TRACE_POINT;
struct rspamd_lua_url *url = lua_check_url (L, 1);
- if (url != NULL && url->url->visible_part) {
- lua_pushstring (L, url->url->visible_part);
+ if (url != NULL && url->url->ext && url->url->ext->visible_part) {
+ lua_pushstring (L, url->url->ext->visible_part);
}
else {
lua_pushnil (L);
lua_settable (L, -3);
}
- if (u->port != 0) {
- lua_pushstring (L, "port");
- lua_pushinteger (L, u->port);
- lua_settable (L, -3);
- }
+ lua_pushstring (L, "port");
+ lua_pushinteger (L, rspamd_url_get_port(u));
+ lua_settable (L, -3);
if (u->tldlen > 0) {
lua_pushstring (L, "tld");