1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
/* URL check functions */
#ifndef URL_H
#define URL_H
#include "config.h"
#include "mem_pool.h"
struct rspamd_task;
struct mime_text_part;
struct uri {
/* The start of the uri (and thus start of the protocol string). */
gchar *string;
/* The internal type of protocol. Can _never_ be PROTOCOL_UNKNOWN. */
gint protocol; /* enum protocol */
gint ip_family;
gchar *user;
gchar *password;
gchar *host;
gchar *port;
gchar *data;
gchar *fragment;
gchar *post;
gchar *surbl;
struct uri *phished_url;
/* @protocollen should only be usable if @protocol is either
* PROTOCOL_USER or an uri string should be composed. */
guint protocollen;
guint userlen;
guint passwordlen;
guint hostlen;
guint portlen;
guint datalen;
guint fragmentlen;
guint surbllen;
/* Flags */
gboolean ipv6; /* URI contains IPv6 host */
gboolean form; /* URI originated from form */
gboolean is_phished; /* URI maybe phishing */
};
enum uri_errno {
URI_ERRNO_OK, /* Parsing went well */
URI_ERRNO_EMPTY, /* The URI string was empty */
URI_ERRNO_INVALID_PROTOCOL, /* No protocol was found */
URI_ERRNO_NO_SLASHES, /* Slashes after protocol missing */
URI_ERRNO_TOO_MANY_SLASHES, /* Too many slashes after protocol */
URI_ERRNO_TRAILING_DOTS, /* '.' after host */
URI_ERRNO_NO_HOST, /* Host part is missing */
URI_ERRNO_NO_PORT_COLON, /* ':' after host without port */
URI_ERRNO_NO_HOST_SLASH, /* Slash after host missing */
URI_ERRNO_IPV6_SECURITY, /* IPv6 security bug detected */
URI_ERRNO_INVALID_PORT, /* Port number is bad */
URI_ERRNO_INVALID_PORT_RANGE /* Port number is not within 0-65535 */
};
enum protocol {
PROTOCOL_FILE,
PROTOCOL_FTP,
PROTOCOL_HTTP,
PROTOCOL_HTTPS,
PROTOCOL_MAILTO,
PROTOCOL_UNKNOWN
};
#define struri(uri) ((uri)->string)
/*
* Parse urls inside text
* @param pool memory pool
* @param task task object
* @param part current text part
* @param is_html turn on html euristic
*/
void url_parse_text (rspamd_mempool_t *pool,
struct rspamd_task *task,
struct mime_text_part *part,
gboolean is_html);
/*
* Parse a single url into an uri structure
* @param pool memory pool
* @param uristring text form of url
* @param uri url object, must be pre allocated
*/
enum uri_errno parse_uri (struct uri *uri,
gchar *uristring,
rspamd_mempool_t *pool);
/*
* Try to extract url from a text
* @param pool memory pool
* @param begin begin of text
* @param len length of text
* @param start storage for start position of url found (or NULL)
* @param end storage for end position of url found (or NULL)
* @param url_str storage for url string(or NULL)
* @return TRUE if url is found in specified text
*/
gboolean url_try_text (rspamd_mempool_t *pool,
const gchar *begin,
gsize len,
gchar **start,
gchar **end,
gchar **url_str,
gboolean is_html);
/*
* Return text representation of url parsing error
*/
const gchar * url_strerror (enum uri_errno err);
/*
* URL unescape characters in the specified string
*/
void rspamd_url_unescape (gchar *s);
#endif
|