aboutsummaryrefslogtreecommitdiffstats
path: root/src/libserver/url.h
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2014-04-21 16:25:51 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2014-04-21 16:25:51 +0100
commit61555065f3d1c8badcc9573691232f1b6e42988c (patch)
tree563d5b7cb8c468530f7e79c4da0a75267b1184e1 /src/libserver/url.h
parentad5bf825b7f33bc10311673991f0cc888e69c0b1 (diff)
downloadrspamd-61555065f3d1c8badcc9573691232f1b6e42988c.tar.gz
rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.zip
Rework project structure, remove trash files.
Diffstat (limited to 'src/libserver/url.h')
-rw-r--r--src/libserver/url.h111
1 files changed, 111 insertions, 0 deletions
diff --git a/src/libserver/url.h b/src/libserver/url.h
new file mode 100644
index 000000000..60535ba5c
--- /dev/null
+++ b/src/libserver/url.h
@@ -0,0 +1,111 @@
+/* URL check functions */
+#ifndef URL_H
+#define URL_H
+
+#include "config.h"
+#include "mem_pool.h"
+
+struct rspamd_task;
+struct mime_text_part;
+
+struct uri {
+ /* The start of the uri (and thus start of the protocol string). */
+ gchar *string;
+
+ /* The internal type of protocol. Can _never_ be PROTOCOL_UNKNOWN. */
+ gint protocol; /* enum protocol */
+
+ gint ip_family;
+
+ gchar *user;
+ gchar *password;
+ gchar *host;
+ gchar *port;
+ /* @data can contain both the path and query uri fields.
+ * It can never be NULL but can have zero length. */
+ gchar *data;
+ gchar *fragment;
+ /* @post can contain some special encoded form data, used internally
+ * to make form data handling more efficient. The data is marked by
+ * POST_CHAR in the uri string. */
+ gchar *post;
+
+ struct uri *phished_url;
+
+ /* @protocollen should only be usable if @protocol is either
+ * PROTOCOL_USER or an uri string should be composed. */
+ guint protocollen;
+ guint userlen;
+ guint passwordlen;
+ guint hostlen;
+ guint portlen;
+ guint datalen;
+ guint fragmentlen;
+
+ /* Flags */
+ gboolean ipv6; /* URI contains IPv6 host */
+ gboolean form; /* URI originated from form */
+ gboolean is_phished; /* URI maybe phishing */
+};
+
+enum uri_errno {
+ URI_ERRNO_OK, /* Parsing went well */
+ URI_ERRNO_EMPTY, /* The URI string was empty */
+ URI_ERRNO_INVALID_PROTOCOL, /* No protocol was found */
+ URI_ERRNO_NO_SLASHES, /* Slashes after protocol missing */
+ URI_ERRNO_TOO_MANY_SLASHES, /* Too many slashes after protocol */
+ URI_ERRNO_TRAILING_DOTS, /* '.' after host */
+ URI_ERRNO_NO_HOST, /* Host part is missing */
+ URI_ERRNO_NO_PORT_COLON, /* ':' after host without port */
+ URI_ERRNO_NO_HOST_SLASH, /* Slash after host missing */
+ URI_ERRNO_IPV6_SECURITY, /* IPv6 security bug detected */
+ URI_ERRNO_INVALID_PORT, /* Port number is bad */
+ URI_ERRNO_INVALID_PORT_RANGE /* Port number is not within 0-65535 */
+};
+
+enum protocol {
+ PROTOCOL_FILE,
+ PROTOCOL_FTP,
+ PROTOCOL_HTTP,
+ PROTOCOL_HTTPS,
+ PROTOCOL_MAILTO,
+ PROTOCOL_UNKNOWN
+};
+
+#define struri(uri) ((uri)->string)
+
+/*
+ * Parse urls inside text
+ * @param pool memory pool
+ * @param task task object
+ * @param part current text part
+ * @param is_html turn on html euristic
+ */
+void url_parse_text (rspamd_mempool_t *pool, struct rspamd_task *task, struct mime_text_part *part, gboolean is_html);
+
+/*
+ * Parse a single url into an uri structure
+ * @param pool memory pool
+ * @param uristring text form of url
+ * @param uri url object, must be pre allocated
+ */
+enum uri_errno parse_uri(struct uri *uri, gchar *uristring, rspamd_mempool_t *pool);
+
+/*
+ * Try to extract url from a text
+ * @param pool memory pool
+ * @param begin begin of text
+ * @param len length of text
+ * @param start storage for start position of url found (or NULL)
+ * @param end storage for end position of url found (or NULL)
+ * @param url_str storage for url string(or NULL)
+ * @return TRUE if url is found in specified text
+ */
+gboolean url_try_text (rspamd_mempool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **end, gchar **url_str, gboolean is_html);
+
+/*
+ * Return text representation of url parsing error
+ */
+const gchar* url_strerror (enum uri_errno err);
+
+#endif