diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2008-06-11 18:34:33 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2008-06-11 18:34:33 +0400 |
commit | 7cd13c464ff3e025a0ce70302dede40a1b2d3f29 (patch) | |
tree | 06d94746e6e6c962ff77eeddc977d7a433a9a3f3 /url.h | |
parent | 25395e554edc13f457e48d5fdf69095b3dbe5e17 (diff) | |
download | rspamd-7cd13c464ff3e025a0ce70302dede40a1b2d3f29.tar.gz rspamd-7cd13c464ff3e025a0ce70302dede40a1b2d3f29.zip |
* Add initial version of URLs parser (still need to make PCRE parse all pattern matches)
* Link with PCRE
Diffstat (limited to 'url.h')
-rw-r--r-- | url.h | 86 |
1 files changed, 86 insertions, 0 deletions
@@ -0,0 +1,86 @@ +/* URL check functions */ +#ifndef URL_H +#define URL_H + +#include <sys/types.h> +#include <sys/socket.h> +#ifndef OWN_QUEUE_H +#include <sys/queue.h> +#else +#include "queue.h" +#endif + +#include <glib.h> + +struct worker_task; + +struct uri { + /* The start of the uri (and thus start of the protocol string). */ + unsigned char *string; + + /* The internal type of protocol. Can _never_ be PROTOCOL_UNKNOWN. */ + int protocol; /* enum protocol */ + + int ip_family; + + unsigned char *user; + unsigned char *password; + unsigned char *host; + unsigned char *port; + /* @data can contain both the path and query uri fields. + * It can never be NULL but can have zero length. */ + unsigned char *data; + unsigned char *fragment; + /* @post can contain some special encoded form data, used internally + * to make form data handling more efficient. The data is marked by + * POST_CHAR in the uri string. */ + unsigned char *post; + + /* @protocollen should only be usable if @protocol is either + * PROTOCOL_USER or an uri string should be composed. */ + unsigned int protocollen:16; + unsigned int userlen:16; + unsigned int passwordlen:16; + unsigned int hostlen:16; + unsigned int portlen:8; + unsigned int datalen:16; + unsigned int fragmentlen:16; + + /* Flags */ + unsigned int ipv6:1; /* URI contains IPv6 host */ + unsigned int form:1; /* URI originated from form */ + + /* Link */ + TAILQ_ENTRY(uri) next; +}; + +enum uri_errno { + URI_ERRNO_OK, /* Parsing went well */ + URI_ERRNO_EMPTY, /* The URI string was empty */ + URI_ERRNO_INVALID_PROTOCOL, /* No protocol was found */ + URI_ERRNO_NO_SLASHES, /* Slashes after protocol missing */ + URI_ERRNO_TOO_MANY_SLASHES, /* Too many slashes after protocol */ + URI_ERRNO_TRAILING_DOTS, /* '.' after host */ + URI_ERRNO_NO_HOST, /* Host part is missing */ + URI_ERRNO_NO_PORT_COLON, /* ':' after host without port */ + URI_ERRNO_NO_HOST_SLASH, /* Slash after host missing */ + URI_ERRNO_IPV6_SECURITY, /* IPv6 security bug detected */ + URI_ERRNO_INVALID_PORT, /* Port number is bad */ + URI_ERRNO_INVALID_PORT_RANGE, /* Port number is not within 0-65535 */ +}; + +enum protocol { + PROTOCOL_FILE, + PROTOCOL_FTP, + PROTOCOL_HTTP, + PROTOCOL_HTTPS, + + PROTOCOL_UNKNOWN, +}; + +#define struri(uri) ((uri)->string) + +void url_parse_html (struct worker_task *task, GByteArray *part); +void url_parse_text (struct worker_task *task, GByteArray *part); + +#endif |