summaryrefslogtreecommitdiffstats
path: root/src/libserver/url.h
blob: c9700436b3c9b3805baa68bb6d484042b7ded0e2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/* URL check functions */
#ifndef URL_H
#define URL_H

#include "config.h"
#include "mem_pool.h"

struct rspamd_task;
struct mime_text_part;

struct uri {
	/* The start of the uri (and thus start of the protocol string). */
	gchar *string;

	/* The internal type of protocol. Can _never_ be PROTOCOL_UNKNOWN. */
	gint protocol; /* enum protocol */

	gint ip_family;

	gchar *user;
	gchar *password;
	gchar *host;
	gchar *port;
	gchar *data;
	gchar *fragment;
	gchar *post;
	gchar *surbl;

	struct uri *phished_url;

	/* @protocollen should only be usable if @protocol is either
	 * PROTOCOL_USER or an uri string should be composed. */
	guint protocollen;
	guint userlen;
	guint passwordlen;
	guint hostlen;
	guint portlen;
	guint datalen;
	guint fragmentlen;
	guint surbllen;

	/* Flags */
	gboolean ipv6;  /* URI contains IPv6 host */
	gboolean form;  /* URI originated from form */
	gboolean is_phished; /* URI maybe phishing */
};

enum uri_errno {
	URI_ERRNO_OK,           /* Parsing went well */
	URI_ERRNO_EMPTY,        /* The URI string was empty */
	URI_ERRNO_INVALID_PROTOCOL, /* No protocol was found */
	URI_ERRNO_NO_SLASHES,       /* Slashes after protocol missing */
	URI_ERRNO_TOO_MANY_SLASHES, /* Too many slashes after protocol */
	URI_ERRNO_TRAILING_DOTS,    /* '.' after host */
	URI_ERRNO_NO_HOST,      /* Host part is missing */
	URI_ERRNO_NO_PORT_COLON,    /* ':' after host without port */
	URI_ERRNO_NO_HOST_SLASH,    /* Slash after host missing */
	URI_ERRNO_IPV6_SECURITY,    /* IPv6 security bug detected */
	URI_ERRNO_INVALID_PORT,     /* Port number is bad */
	URI_ERRNO_INVALID_PORT_RANGE    /* Port number is not within 0-65535 */
};

enum protocol {
	PROTOCOL_FILE,
	PROTOCOL_FTP,
	PROTOCOL_HTTP,
	PROTOCOL_HTTPS,
	PROTOCOL_MAILTO,
	PROTOCOL_UNKNOWN
};

#define struri(uri) ((uri)->string)

/*
 * Parse urls inside text
 * @param pool memory pool
 * @param task task object
 * @param part current text part
 * @param is_html turn on html euristic
 */
void url_parse_text (rspamd_mempool_t *pool,
	struct rspamd_task *task,
	struct mime_text_part *part,
	gboolean is_html);

/*
 * Parse a single url into an uri structure
 * @param pool memory pool
 * @param uristring text form of url
 * @param uri url object, must be pre allocated
 */
enum uri_errno parse_uri (struct uri *uri,
	gchar *uristring,
	rspamd_mempool_t *pool);

/*
 * Try to extract url from a text
 * @param pool memory pool
 * @param begin begin of text
 * @param len length of text
 * @param start storage for start position of url found (or NULL)
 * @param end storage for end position of url found (or NULL)
 * @param url_str storage for url string(or NULL)
 * @return TRUE if url is found in specified text
 */
gboolean url_try_text (rspamd_mempool_t *pool,
	const gchar *begin,
	gsize len,
	gchar **start,
	gchar **end,
	gchar **url_str,
	gboolean is_html);

/*
 * Return text representation of url parsing error
 */
const gchar * url_strerror (enum uri_errno err);

/*
 * URL unescape characters in the specified string
 */
void rspamd_url_unescape (gchar *s);

#endif