aboutsummaryrefslogtreecommitdiffstats
path: root/url.h
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2008-06-11 18:34:33 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2008-06-11 18:34:33 +0400
commit7cd13c464ff3e025a0ce70302dede40a1b2d3f29 (patch)
tree06d94746e6e6c962ff77eeddc977d7a433a9a3f3 /url.h
parent25395e554edc13f457e48d5fdf69095b3dbe5e17 (diff)
downloadrspamd-7cd13c464ff3e025a0ce70302dede40a1b2d3f29.tar.gz
rspamd-7cd13c464ff3e025a0ce70302dede40a1b2d3f29.zip
* Add initial version of URLs parser (still need to make PCRE parse all pattern matches)
* Link with PCRE
Diffstat (limited to 'url.h')
-rw-r--r--url.h86
1 files changed, 86 insertions, 0 deletions
diff --git a/url.h b/url.h
new file mode 100644
index 000000000..7d9d87db1
--- /dev/null
+++ b/url.h
@@ -0,0 +1,86 @@
+/* URL check functions */
+#ifndef URL_H
+#define URL_H
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#ifndef OWN_QUEUE_H
+#include <sys/queue.h>
+#else
+#include "queue.h"
+#endif
+
+#include <glib.h>
+
+struct worker_task;
+
+struct uri {
+ /* The start of the uri (and thus start of the protocol string). */
+ unsigned char *string;
+
+ /* The internal type of protocol. Can _never_ be PROTOCOL_UNKNOWN. */
+ int protocol; /* enum protocol */
+
+ int ip_family;
+
+ unsigned char *user;
+ unsigned char *password;
+ unsigned char *host;
+ unsigned char *port;
+ /* @data can contain both the path and query uri fields.
+ * It can never be NULL but can have zero length. */
+ unsigned char *data;
+ unsigned char *fragment;
+ /* @post can contain some special encoded form data, used internally
+ * to make form data handling more efficient. The data is marked by
+ * POST_CHAR in the uri string. */
+ unsigned char *post;
+
+ /* @protocollen should only be usable if @protocol is either
+ * PROTOCOL_USER or an uri string should be composed. */
+ unsigned int protocollen:16;
+ unsigned int userlen:16;
+ unsigned int passwordlen:16;
+ unsigned int hostlen:16;
+ unsigned int portlen:8;
+ unsigned int datalen:16;
+ unsigned int fragmentlen:16;
+
+ /* Flags */
+ unsigned int ipv6:1; /* URI contains IPv6 host */
+ unsigned int form:1; /* URI originated from form */
+
+ /* Link */
+ TAILQ_ENTRY(uri) next;
+};
+
+enum uri_errno {
+ URI_ERRNO_OK, /* Parsing went well */
+ URI_ERRNO_EMPTY, /* The URI string was empty */
+ URI_ERRNO_INVALID_PROTOCOL, /* No protocol was found */
+ URI_ERRNO_NO_SLASHES, /* Slashes after protocol missing */
+ URI_ERRNO_TOO_MANY_SLASHES, /* Too many slashes after protocol */
+ URI_ERRNO_TRAILING_DOTS, /* '.' after host */
+ URI_ERRNO_NO_HOST, /* Host part is missing */
+ URI_ERRNO_NO_PORT_COLON, /* ':' after host without port */
+ URI_ERRNO_NO_HOST_SLASH, /* Slash after host missing */
+ URI_ERRNO_IPV6_SECURITY, /* IPv6 security bug detected */
+ URI_ERRNO_INVALID_PORT, /* Port number is bad */
+ URI_ERRNO_INVALID_PORT_RANGE, /* Port number is not within 0-65535 */
+};
+
+enum protocol {
+ PROTOCOL_FILE,
+ PROTOCOL_FTP,
+ PROTOCOL_HTTP,
+ PROTOCOL_HTTPS,
+
+ PROTOCOL_UNKNOWN,
+};
+
+#define struri(uri) ((uri)->string)
+
+void url_parse_html (struct worker_task *task, GByteArray *part);
+void url_parse_text (struct worker_task *task, GByteArray *part);
+
+#endif