]> source.dussan.org Git - rspamd.git/commitdiff
* Use glib logger and regexp matching functions
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 8 Sep 2008 15:29:34 +0000 (19:29 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 8 Sep 2008 15:29:34 +0000 (19:29 +0400)
main.c
main.h
test/rspamd_url_test.c
url.c

diff --git a/main.c b/main.c
index dc1907f85191d5b244a1a679ca00b832cde50def..0478599ce33666f558c84350bf233403af3b05ce 100644 (file)
--- a/main.c
+++ b/main.c
@@ -184,7 +184,6 @@ main (int argc, char **argv)
        rspamd->cfg->cfg_name = strdup (FIXED_CONFIG_FILE);
        read_cmd_line (argc, argv, rspamd->cfg);
 
-       openlog("rspamd", LOG_PID, LOG_MAIL);
     msg_warn ("(main) starting...");
 
        #ifndef HAVE_SETPROCTITLE
diff --git a/main.h b/main.h
index ddfbf959ff5044f66899b9b3841121aade5695ee..cedec5eb25aaf82e97d7964140cda7a61164274d 100644 (file)
--- a/main.h
+++ b/main.h
 #define SOFT_SHUTDOWN_TIME 60
 
 /* Logging in postfix style */
-#define msg_err(args...) syslog(LOG_ERR, ##args)
-#define msg_warn(args...)      syslog(LOG_WARNING, ##args)
-#define msg_info(args...)      syslog(LOG_INFO, ##args)
-#define msg_debug(args...) syslog(LOG_DEBUG, ##args)
+#define msg_err g_error
+#define msg_warn       g_warning
+#define msg_info       g_message
+#define msg_debug g_debug
 
 /* Process type: main or worker */
 enum process_type {
index 29be737e1b81839024f249ad1116f17b6a28f8f8..17565de80dd61e4320fdfcfed31995190afdf5d9 100644 (file)
@@ -33,13 +33,14 @@ rspamd_url_test_func ()
        text->data = (gchar *)test_text;
        text->len = sizeof (test_text);
        html = g_byte_array_new();
-       text->data = (gchar *)test_html;
-       text->len = sizeof (test_html);
+       html->data = (gchar *)test_html;
+       html->len = sizeof (test_html);
        bzero (&task, sizeof (task));
        TAILQ_INIT (&task.urls);
        
        g_test_timer_start ();
        g_test_message ("* Testing text URL regexp parser *");
+       g_test_message ("Passing string: %s", test_text);
        url_parse_text (&task, text);
 
        TAILQ_FOREACH (url, &task.urls, next) {
@@ -60,6 +61,7 @@ rspamd_url_test_func ()
        i = 0;
        g_test_timer_start ();
        g_test_message ("* Testing html URL regexp parser *");
+       g_test_message ("Passing string: %s", test_html);
        url_parse_html (&task, html);
 
        TAILQ_FOREACH (url, &task.urls, next) {
diff --git a/url.c b/url.c
index 2fb01f396aebe3da21a8228bda5a53b395b0ec49..97091c3e25b7509c889170776069ceb547c84204 100644 (file)
--- a/url.c
+++ b/url.c
@@ -2,7 +2,6 @@
 #include <stdlib.h>
 #include <ctype.h>
 #include <errno.h>
-#include <pcre.h>
 #include <syslog.h>
 #include <sys/socket.h>
 #include <arpa/inet.h>
@@ -36,13 +35,10 @@ struct _proto {
 static const char *html_url = "((?:href\\s*=\\s*)|(?:archive\\s*=\\s*)|(?:code\\s*=\\s*)|(?:codebase\\s*=\\s*)|(?:src\\s*=\\s*)|(?:cite\\s*=\\s*)"
 "|(:?background\\s*=\\s*)|(?:pluginspage\\s*=\\s*)|(?:pluginurl\\s*=\\s*)|(?:action\\s*=\\s*)|(?:dynsrc\\s*=\\s*)|(?:longdesc\\s*=\\s*)|(?:lowsrc\\s*=\\s*)|(?:usemap\\s*=\\s*))"
 "\\\"?([^>\"<]+)\\\"?";
-static const char *text_url = "((?:mailto\\:|(?:news|(?:ht|f)tp(?:s?))\\://){1}[^>\"<]+)";
+static const char *text_url = "((?:mailto\\:|(?:news|(?:ht|f)tp(?:s?))\\://){1}[^ ]+)";
 
 static short url_initialized = 0;
-static pcre_extra *text_re_extra;
-static pcre *text_re;
-static pcre_extra *html_re_extra;
-static pcre *html_re;
+GRegex *text_re, *html_re;
 
 static const struct _proto protocol_backends[] = {
        { "file",          0, NULL,             1, 0, 0, 0 },
@@ -160,20 +156,22 @@ check_uri_file(unsigned char *name)
 static int
 url_init (void)
 {
+       GError *err = NULL;
        if (url_initialized == 0) {
-               text_re = pcre_compile (text_url, PCRE_CASELESS, NULL, 0, NULL);
-               if (text_re == NULL) {
-                       msg_info ("url_init: cannot init url parsing regexp");
+               text_re = g_regex_new  (text_url, G_REGEX_CASELESS | G_REGEX_MULTILINE | G_REGEX_RAW, 0, &err);
+               if (err != NULL) {
+                       msg_info ("url_init: cannot init text url parsing regexp: %s", err->message);
+                       g_error_free (err);
                        return -1;
                }
-               text_re_extra = pcre_study (text_re, 0, NULL);
-               html_re = pcre_compile (html_url, PCRE_CASELESS, NULL, 0, NULL);
-               if (html_re == NULL) {
-                       msg_info ("url_init: cannot init url parsing regexp");
+               html_re = g_regex_new (html_url, G_REGEX_CASELESS | G_REGEX_MULTILINE | G_REGEX_RAW, 0, &err);
+               if (err != NULL) {
+                       msg_info ("url_init: cannot init html url parsing regexp: %s", err->message);
+                       g_error_free (err);
                        return -1;
                }
-               html_re_extra = pcre_study (html_re, 0, NULL);
                url_initialized = 1;
+               msg_debug ("url_init: url regexps initialized successfully, text regexp: /%s/, html_regexp: /%s/", text_url, html_url);
        }
 
        return 0;
@@ -874,53 +872,81 @@ normalize_uri(struct uri *uri, unsigned char *uristring)
 void 
 url_parse_text (struct worker_task *task, GByteArray *content)
 {
-       int ovec[30];
-       int pos = 0, rc;
+       GMatchInfo *info;
+       GError *err = NULL;
+       int pos = 0, start;
+       gboolean rc;
        char *url_str = NULL;
        struct uri *new;
 
        if (url_init () == 0) {
-               while ((rc = pcre_exec (text_re, text_re_extra, (const char *)content->data, content->len, pos, 0, 
-                                               ovec, sizeof (ovec) / sizeof (ovec[0])) >= 0)) {
-                       if (rc > 0) {
-                               pos = ovec[1];
-                               pcre_get_substring ((const char *)content->data, ovec, rc, 1, (const char **)&url_str);
-                               if (url_str != NULL) {
-                                       new = g_malloc (sizeof (struct uri));
-                                       if (new != NULL) {
-                                               parse_uri (new, url_str);
-                                               normalize_uri (new, url_str);
-                                               TAILQ_INSERT_TAIL (&task->urls, new, next);
+               do {
+                       rc = g_regex_match_full (text_re, (const char *)content->data, content->len, pos, 0, &info, &err);
+                       if (rc) {
+                               if (g_match_info_matches (info)) {
+                                       g_match_info_fetch_pos (info, 0, &start, &pos);
+                                       url_str = g_match_info_fetch (info, 1);
+                                       msg_debug ("url_parse_text: extracted string with regexp: '%s'", url_str);
+                                       if (url_str != NULL) {
+                                               new = g_malloc (sizeof (struct uri));
+                                               if (new != NULL) {
+                                                       parse_uri (new, url_str);
+                                                       normalize_uri (new, url_str);
+                                                       TAILQ_INSERT_TAIL (&task->urls, new, next);
+                                               }
                                        }
+                                       g_free (url_str);
                                }
+                               g_match_info_free (info);
                        }
-               } 
+                       else if (err != NULL) {
+                               msg_debug ("url_parse_text: error matching regexp: %s", err->message);
+                               g_free (err);
+                       }
+                       else {
+                               msg_debug ("url_parse_text: cannot find url pattern in given string");
+                       }
+               } while (rc > 0);
        }
 }
 
 void 
 url_parse_html (struct worker_task *task, GByteArray *content)
 {
-       int ovec[30];
-       int pos = 0, rc;
+       GMatchInfo *info;
+       GError *err = NULL;
+       int pos = 0, start;
+       gboolean rc;
        char *url_str = NULL;
        struct uri *new;
 
        if (url_init () == 0) {
-               while ((rc = pcre_exec (html_re, html_re_extra, (const char *)content->data, content->len, pos, 0, 
-                                               ovec, sizeof (ovec) / sizeof (ovec[0])) >= 0)) {
-                       if (rc > 0) {
-                               pos = ovec[1];
-                               pcre_get_substring ((const char *)content->data, ovec, rc, 3, (const char **)&url_str);
-                               if (url_str != NULL) {
-                                       new = g_malloc (sizeof (struct uri));
-                                       if (new != NULL) {
-                                               parse_uri (new, url_str);
-                                               normalize_uri (new, url_str);
-                                               TAILQ_INSERT_TAIL (&task->urls, new, next);
+               do {
+                       rc = g_regex_match_full (html_re, (const char *)content->data, content->len, pos, 0, &info, &err);
+                       if (rc) {
+                               if (g_match_info_matches (info)) {
+                                       g_match_info_fetch_pos (info, 0, &start, &pos);
+                                       url_str = g_match_info_fetch (info, 3);
+                                       msg_debug ("url_parse_html: extracted string with regexp: '%s'", url_str);
+                                       if (url_str != NULL) {
+                                               new = g_malloc (sizeof (struct uri));
+                                               if (new != NULL) {
+                                                       parse_uri (new, url_str);
+                                                       normalize_uri (new, url_str);
+                                                       TAILQ_INSERT_TAIL (&task->urls, new, next);
+                                               }
                                        }
+                                       g_free (url_str);
                                }
+                               g_match_info_free (info);
                        }
-               }
+                       else if (err) {
+                               msg_debug ("url_parse_html: error matching regexp: %s", err->message);
+                               g_free (err);
+                       }
+                       else {
+                               msg_debug ("url_parse_html: cannot find url pattern in given string");
+                       }
+               } while (rc > 0);
        }
 }