diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-04-21 16:25:51 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-04-21 16:25:51 +0100 |
commit | 61555065f3d1c8badcc9573691232f1b6e42988c (patch) | |
tree | 563d5b7cb8c468530f7e79c4da0a75267b1184e1 /src/libserver/url.h | |
parent | ad5bf825b7f33bc10311673991f0cc888e69c0b1 (diff) | |
download | rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.tar.gz rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.zip |
Rework project structure, remove trash files.
Diffstat (limited to 'src/libserver/url.h')
-rw-r--r-- | src/libserver/url.h | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/src/libserver/url.h b/src/libserver/url.h new file mode 100644 index 000000000..60535ba5c --- /dev/null +++ b/src/libserver/url.h @@ -0,0 +1,111 @@ +/* URL check functions */ +#ifndef URL_H +#define URL_H + +#include "config.h" +#include "mem_pool.h" + +struct rspamd_task; +struct mime_text_part; + +struct uri { + /* The start of the uri (and thus start of the protocol string). */ + gchar *string; + + /* The internal type of protocol. Can _never_ be PROTOCOL_UNKNOWN. */ + gint protocol; /* enum protocol */ + + gint ip_family; + + gchar *user; + gchar *password; + gchar *host; + gchar *port; + /* @data can contain both the path and query uri fields. + * It can never be NULL but can have zero length. */ + gchar *data; + gchar *fragment; + /* @post can contain some special encoded form data, used internally + * to make form data handling more efficient. The data is marked by + * POST_CHAR in the uri string. */ + gchar *post; + + struct uri *phished_url; + + /* @protocollen should only be usable if @protocol is either + * PROTOCOL_USER or an uri string should be composed. */ + guint protocollen; + guint userlen; + guint passwordlen; + guint hostlen; + guint portlen; + guint datalen; + guint fragmentlen; + + /* Flags */ + gboolean ipv6; /* URI contains IPv6 host */ + gboolean form; /* URI originated from form */ + gboolean is_phished; /* URI maybe phishing */ +}; + +enum uri_errno { + URI_ERRNO_OK, /* Parsing went well */ + URI_ERRNO_EMPTY, /* The URI string was empty */ + URI_ERRNO_INVALID_PROTOCOL, /* No protocol was found */ + URI_ERRNO_NO_SLASHES, /* Slashes after protocol missing */ + URI_ERRNO_TOO_MANY_SLASHES, /* Too many slashes after protocol */ + URI_ERRNO_TRAILING_DOTS, /* '.' after host */ + URI_ERRNO_NO_HOST, /* Host part is missing */ + URI_ERRNO_NO_PORT_COLON, /* ':' after host without port */ + URI_ERRNO_NO_HOST_SLASH, /* Slash after host missing */ + URI_ERRNO_IPV6_SECURITY, /* IPv6 security bug detected */ + URI_ERRNO_INVALID_PORT, /* Port number is bad */ + URI_ERRNO_INVALID_PORT_RANGE /* Port number is not within 0-65535 */ +}; + +enum protocol { + PROTOCOL_FILE, + PROTOCOL_FTP, + PROTOCOL_HTTP, + PROTOCOL_HTTPS, + PROTOCOL_MAILTO, + PROTOCOL_UNKNOWN +}; + +#define struri(uri) ((uri)->string) + +/* + * Parse urls inside text + * @param pool memory pool + * @param task task object + * @param part current text part + * @param is_html turn on html euristic + */ +void url_parse_text (rspamd_mempool_t *pool, struct rspamd_task *task, struct mime_text_part *part, gboolean is_html); + +/* + * Parse a single url into an uri structure + * @param pool memory pool + * @param uristring text form of url + * @param uri url object, must be pre allocated + */ +enum uri_errno parse_uri(struct uri *uri, gchar *uristring, rspamd_mempool_t *pool); + +/* + * Try to extract url from a text + * @param pool memory pool + * @param begin begin of text + * @param len length of text + * @param start storage for start position of url found (or NULL) + * @param end storage for end position of url found (or NULL) + * @param url_str storage for url string(or NULL) + * @return TRUE if url is found in specified text + */ +gboolean url_try_text (rspamd_mempool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **end, gchar **url_str, gboolean is_html); + +/* + * Return text representation of url parsing error + */ +const gchar* url_strerror (enum uri_errno err); + +#endif |