* Fixes to fuzzy hashing logic, skip urls while estimating fuzzy hash

author Vsevolod Stakhov <vsevolod@rambler-co.ru>

Thu, 23 Jun 2011 15:05:58 +0000 (19:05 +0400)

committer Vsevolod Stakhov <vsevolod@rambler-co.ru>

Thu, 23 Jun 2011 15:05:58 +0000 (19:05 +0400)
author Vsevolod Stakhov <vsevolod@rambler-co.ru>
Thu, 23 Jun 2011 15:05:58 +0000 (19:05 +0400)
committer Vsevolod Stakhov <vsevolod@rambler-co.ru>
Thu, 23 Jun 2011 15:05:58 +0000 (19:05 +0400)
diff --git a/src/expressions.c b/src/expressions.c

index e590ad6302cb2e760a87340943e056b7fd344252..fa6ce0fefa578469770e9a05b7ffa758e4fdc8ca 100644 (file)
--- a/src/expressions.c
+++ b/src/expressions.c
@@ -1059,7 +1059,7 @@ rspamd_parts_distance (struct worker_task * task, GList * args, void *unused)
                         return FALSE;
                 }
                 if (!p1->is_empty && !p2->is_empty) {
-                       diff = fuzzy_compare_hashes (p1->fuzzy, p2->fuzzy);
+                       diff = fuzzy_compare_parts (p1, p2);
                         debug_task ("got likeliness between parts of %d%%, threshold is %d%%", diff, threshold);
                         if (diff <= threshold) {
                                 return TRUE;
diff --git a/src/fuzzy.c b/src/fuzzy.c

index 61ef5647e0d9b01d4f0132f799337146a8f02b1a..ce5217b5b65616ff3402dce8f68502550e6e4d81 100644 (file)
--- a/src/fuzzy.c
+++ b/src/fuzzy.c
@@ -27,6 +27,9 @@
  #include "mem_pool.h"
  #include "fstring.h"
  #include "fuzzy.h"
+#include "message.h"
+#include "url.h"
+#include "main.h"
  
  #define ROLL_WINDOW_SIZE 9
  #define MIN_FUZZY_BLOCK_SIZE 3
@@ -81,16 +84,17 @@ fuzzy_fnv_hash (gchar c, guint32 hval)
  static                          guint32
  fuzzy_blocksize (guint32 len)
  {
+       guint32                     nlen = MIN_FUZZY_BLOCK_SIZE;
  
-       if (len < MIN_FUZZY_BLOCK_SIZE) {
-               return MIN_FUZZY_BLOCK_SIZE;
+       while (nlen * (FUZZY_HASHLEN - 1) < len) {
+               nlen *= 2;
         }
-       return g_spaced_primes_closest (len / FUZZY_HASHLEN);
+       return nlen;
  }
  
  
  /* Update hash with new symbol */
-void
+static void
  fuzzy_update (fuzzy_hash_t * h, gchar c)
  {
         h->rh = fuzzy_roll_hash (c);
@@ -105,6 +109,30 @@ fuzzy_update (fuzzy_hash_t * h, gchar c)
         }
  }
  
+static void
+fuzzy_update2 (fuzzy_hash_t * h1, fuzzy_hash_t *h2, gchar c)
+{
+       h1->rh = fuzzy_roll_hash (c);
+       h1->h = fuzzy_fnv_hash (c, h1->h);
+       h2->rh = h1->rh;
+       h2->h = fuzzy_fnv_hash (c, h2->h);
+
+       if (h1->rh % h1->block_size == (h1->block_size - 1)) {
+               h1->hash_pipe[h1->hi] = b64[h1->h % 64];
+               if (h1->hi < FUZZY_HASHLEN - 2) {
+                       h1->h = HASH_INIT;
+                       h1->hi++;
+               }
+       }
+       if (h2->rh % h2->block_size == (h2->block_size - 1)) {
+               h2->hash_pipe[h2->hi] = b64[h2->h % 64];
+               if (h2->hi < FUZZY_HASHLEN - 2) {
+                       h2->h = HASH_INIT;
+                       h2->hi++;
+               }
+       }
+}
+
  /*
   * Levenshtein distance between string1 and string2.
   *
@@ -284,6 +312,90 @@ fuzzy_init_byte_array (GByteArray * in, memory_pool_t * pool)
         return fuzzy_init (&f, pool);
  }
  
+void
+fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool)
+{
+       fuzzy_hash_t                   *new, *new2;
+       gint                            i;
+       gchar                          *c;
+       gsize                           real_len = 0, len = part->content->len;
+       GList                          *cur_offset;
+       struct uri                     *cur_url = NULL;
+       GString                        *debug;
+
+       cur_offset = part->urls_offset;
+       if (cur_offset != NULL) {
+               cur_url = cur_offset->data;
+       }
+
+       c = part->content->data;
+       new = memory_pool_alloc0 (pool, sizeof (fuzzy_hash_t));
+       new2 = memory_pool_alloc0 (pool, sizeof (fuzzy_hash_t));
+       bzero (&rs, sizeof (rs));
+       for (i = 0; i < len;) {
+               if (cur_url != NULL && cur_url->pos == i) {
+                       i += cur_url->len + 1;
+                       c += cur_url->len + 1;
+                       cur_offset = g_list_next (cur_offset);
+                       if (cur_offset != NULL) {
+                               cur_url = cur_offset->data;
+                       }
+               }
+               else {
+                       if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c)) {
+                               real_len ++;
+                       }
+                       c++;
+                       i++;
+               }
+       }
+
+       debug = g_string_sized_new (real_len);
+
+       new->block_size = fuzzy_blocksize (real_len);
+       new2->block_size = new->block_size * 2;
+
+       cur_offset = part->urls_offset;
+       if (cur_offset != NULL) {
+               cur_url = cur_offset->data;
+       }
+
+       c = part->content->data;
+
+       for (i = 0; i < len;) {
+               if (cur_url != NULL && cur_url->pos == i) {
+                       i += cur_url->len + 1;
+                       c += cur_url->len + 1;
+                       cur_offset = g_list_next (cur_offset);
+                       if (cur_offset != NULL) {
+                               cur_url = cur_offset->data;
+                       }
+                       msg_info ("skip url block of %d symbols", cur_url->len);
+               }
+               else {
+                       if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c)) {
+                               fuzzy_update2 (new, new2, *c);
+                               g_string_append_c (debug, *c);
+                       }
+                       c++;
+                       i++;
+               }
+       }
+
+       msg_info ("make hash of string: %v", debug);
+
+       /* Check whether we have more bytes in a rolling window */
+       if (new->rh != 0) {
+               new->hash_pipe[new->hi] = b64[new->h % 64];
+       }
+       if (new2->rh != 0) {
+               new2->hash_pipe[new2->hi] = b64[new2->h % 64];
+       }
+
+       part->fuzzy = new;
+       part->double_fuzzy = new2;
+}
+
  /* Compare score of difference between two hashes 0 - different hashes, 100 - identical hashes */
  gint
  fuzzy_compare_hashes (fuzzy_hash_t * h1, fuzzy_hash_t * h2)
@@ -308,6 +420,22 @@ fuzzy_compare_hashes (fuzzy_hash_t * h1, fuzzy_hash_t * h2)
         return res;
  }
  
+gint
+fuzzy_compare_parts (struct mime_text_part *p1, struct mime_text_part *p2)
+{
+       if (p1->fuzzy->block_size == p2->fuzzy->block_size) {
+               return fuzzy_compare_hashes (p1->fuzzy, p2->fuzzy);
+       }
+       else if (p1->double_fuzzy->block_size == p2->fuzzy->block_size) {
+               return fuzzy_compare_hashes (p1->double_fuzzy, p2->fuzzy);
+       }
+       else if (p2->double_fuzzy->block_size == p1->fuzzy->block_size) {
+               return fuzzy_compare_hashes (p2->double_fuzzy, p1->fuzzy);
+       }
+
+       return 0;
+}
+
  /* 
   * vi:ts=4 
   */
diff --git a/src/fuzzy.h b/src/fuzzy.h

index b5b3856e62335738961abf62684aff2e91794236..271bfee2a4ec9243afb068166290af96d25caf5e 100644 (file)
--- a/src/fuzzy.h
+++ b/src/fuzzy.h
@@ -20,6 +20,8 @@ typedef struct fuzzy_hash_s {
         guint32 hi;                                                     /**< current index in hash pipe         */
  } fuzzy_hash_t;
  
+struct mime_text_part;
+
  /**
   * Calculate fuzzy hash for specified string
   * @param in input string
@@ -28,6 +30,9 @@ typedef struct fuzzy_hash_s {
   */
  fuzzy_hash_t * fuzzy_init (f_str_t *in, memory_pool_t *pool);
  fuzzy_hash_t * fuzzy_init_byte_array (GByteArray *in, memory_pool_t *pool);
+void fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool);
+
+gint fuzzy_compare_parts (struct mime_text_part *p1, struct mime_text_part *p2);
  
  /**
   * Compare score of difference between two hashes 
diff --git a/src/html.c b/src/html.c

index e686570a0babcdcb8dbafeb1f22ea393d113378e..3582022f8db34d286d7db7cab175a3853cba3274 100644 (file)
--- a/src/html.c
+++ b/src/html.c
@@ -687,7 +687,7 @@ check_phishing (struct worker_task *task, struct uri *href_url, const gchar *url
         gchar                           tagbuf[128];
         struct html_tag                *tag;
         gsize                           len = 0;
-       gint                            off, rc;
+       gint                            rc;
  
         p = url_text;
         while (len < remain) {
@@ -719,7 +719,7 @@ check_phishing (struct worker_task *task, struct uri *href_url, const gchar *url
                 p ++;
         }
  
-       if (url_try_text (task->task_pool, url_text, len, &off, &url_str) && url_str != NULL) {
+       if (url_try_text (task->task_pool, url_text, len, NULL, NULL, &url_str) && url_str != NULL) {
                 new = memory_pool_alloc0 (task->task_pool, sizeof (struct uri));
                 if (new != NULL) {
                         g_strstrip (url_str);
@@ -864,13 +864,10 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i
                         /*
                          * Check for phishing
                          */
-                       if ((p = strchr (c, '>')) != NULL ) {
+                       if ((p = strchr (c, '>')) != NULL && id == Tag_A) {
                                 p ++;
                                 check_phishing (task, url, p, remain - (p - tag_text), id);
                         }
-                       if (part->html_urls && g_tree_lookup (part->html_urls, url_text) == NULL) {
-                               g_tree_insert (part->html_urls, url_text, url);
-                       }
                         if (g_tree_lookup (task->urls, url) == NULL) {
                                 g_tree_insert (task->urls, url, url);
                         }
@@ -938,7 +935,8 @@ add_html_node (struct worker_task *task, memory_pool_t * pool, struct mime_text_
                         /* Skip some tags */
                         if (data->tag && (data->tag->id == Tag_STYLE ||
                                                           data->tag->id == Tag_SCRIPT ||
-                                                         data->tag->id == Tag_OBJECT)) {
+                                                         data->tag->id == Tag_OBJECT ||
+                                                         data->tag->id == Tag_TITLE)) {
                                 return FALSE;
                         }
                 }
diff --git a/src/message.c b/src/message.c

index 8d36ad3ebea999ee3a16ef5bae80904e033823ed..0586be8d7305e7064e31742e8915669cf9d6f3e2 100644 (file)
--- a/src/message.c
+++ b/src/message.c
@@ -784,9 +784,6 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont
                 text_part->html_nodes = NULL;
                 text_part->parent = parent;
  
-               text_part->html_urls = g_tree_new ((GCompareFunc) g_ascii_strcasecmp);
-               text_part->urls = g_tree_new ((GCompareFunc) g_ascii_strcasecmp);
-
                 text_part->content = strip_html_tags (task, task->task_pool, text_part, text_part->orig, NULL);
  
                 if (text_part->html_nodes == NULL) {
@@ -800,10 +797,8 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont
  #endif
                 }
  
-               text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
+               fuzzy_init_part (text_part, task->task_pool);
                 memory_pool_add_destructor (task->task_pool, (pool_destruct_func) free_byte_array_callback, text_part->content);
-               memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_tree_destroy, text_part->html_urls);
-               memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_tree_destroy, text_part->urls);
                 task->text_parts = g_list_prepend (task->text_parts, text_part);
         }
         else if (g_mime_content_type_is_type (type, "text", "*")) {
@@ -821,12 +816,9 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont
                 }
                 text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
                 text_part->content = text_part->orig;
-               text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
-               text_part->html_urls = NULL;
-               text_part->urls = g_tree_new ((GCompareFunc) g_ascii_strcasecmp);
                 url_parse_text (task->task_pool, task, text_part, FALSE);
+               fuzzy_init_part (text_part, task->task_pool);
                 task->text_parts = g_list_prepend (task->text_parts, text_part);
-               memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_tree_destroy, text_part->urls);
         }
  }
  
@@ -973,10 +965,10 @@ process_message (struct worker_task *task)
         GMimePart                      *part;
         GMimeDataWrapper               *wrapper;
         struct received_header         *recv;
-       gchar                          *mid, *url_str, *p, *end;
+       gchar                          *mid, *url_str, *p, *end, *url_end;
         struct uri                     *subject_url;
         gsize                           len;
-       gint                            pos, rc;
+       gint                            rc;
  
         tmp = memory_pool_alloc (task->task_pool, sizeof (GByteArray));
         tmp->data = task->msg->begin;
@@ -1127,7 +1119,7 @@ process_message (struct worker_task *task)
  
                 while (p < end) {
                         /* Search to the end of url */
-                       if (url_try_text (task->task_pool, p, end - p, &pos, &url_str)) {
+                       if (url_try_text (task->task_pool, p, end - p, NULL, &url_end, &url_str)) {
                                 if (url_str != NULL) {
                                         subject_url = memory_pool_alloc0 (task->task_pool, sizeof (struct uri));
                                         if (subject_url != NULL) {
@@ -1150,7 +1142,7 @@ process_message (struct worker_task *task)
                         else {
                                 break;
                         }
-                       p += pos;
+                       p = url_end + 1;
                 }
                 /* Free header's list */
                 g_list_free (cur);
diff --git a/src/message.h b/src/message.h

index e70dd07e2e7f59df6fe7feab0af4d861ed5260b7..5f19ab892190e7e97f352dba873b5c95ea46087b 100644 (file)
--- a/src/message.h
+++ b/src/message.h
@@ -30,9 +30,9 @@ struct mime_text_part {
         GByteArray *orig;
         GByteArray *content;
         GNode *html_nodes;
-       GTree *urls;
-       GTree *html_urls;
+       GList *urls_offset;                                                                                 /**< list of offsets of urls                                                */
         fuzzy_hash_t *fuzzy;
+       fuzzy_hash_t *double_fuzzy;
         GMimeObject *parent;
  };
  
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c

index 3f068ed9901221f15c47d7ac6abb53eeba32a7b3..93ba4bf257ed1d8c091ed6326da03160435bfcdd 100644 (file)
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -686,6 +686,7 @@ fuzzy_symbol_callback (struct worker_task *task, void *unused)
                 }
  
                 register_fuzzy_call (task, part->fuzzy);
+               register_fuzzy_call (task, part->double_fuzzy);
  
                 cur = g_list_next (cur);
         }
@@ -843,6 +844,16 @@ fuzzy_process_handler (struct controller_session *session, f_str_t * in)
                                 free_task (task, FALSE);
                                 return;
                         }
+                       if (! register_fuzzy_controller_call (session, task, part->double_fuzzy, cmd, value, flag, saved)) {
+                               /* Cannot write hash */
+                               session->state = STATE_REPLY;
+                               r = rspamd_snprintf (out_buf, sizeof (out_buf), "cannot write fuzzy hash" CRLF "END" CRLF);
+                               if (! rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE)) {
+                                       return;
+                               }
+                               free_task (task, FALSE);
+                               return;
+                       }
                         cur = g_list_next (cur);
                 }
                 /* Process images */
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c

index 441a17de5b80ee787582e613cd44f84654598c86..a82c6011075649155ccd83aa0cd2fd4da56f13b2 100644 (file)
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -845,31 +845,16 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
                         /*XXX: add support of it */
                         msg_warn ("numbered matches are not supported for url regexp");
                 }
-               cur = g_list_first (task->text_parts);
-               while (cur) {
-                       part = (struct mime_text_part *)cur->data;
-                       /* Skip empty parts */
-                       if (part->is_empty) {
-                               cur = g_list_next (cur);
-                               continue;
-                       }
-                       if (part->is_raw) {
-                               regexp = re->raw_regexp;
-                       }
-                       else {
-                               regexp = re->regexp;
-                       }
-                       callback_param.task = task;
-                       callback_param.regexp = regexp;
-                       callback_param.re = re;
-                       callback_param.found = FALSE;
-                       if (part->urls) {
-                               g_tree_foreach (part->urls, tree_url_callback, &callback_param);
-                       }
-                       if (part->html_urls && callback_param.found == FALSE) {
-                               g_tree_foreach (part->html_urls, tree_url_callback, &callback_param);
-                       }
-                       cur = g_list_next (cur);
+               regexp = re->regexp;
+               callback_param.task = task;
+               callback_param.regexp = regexp;
+               callback_param.re = re;
+               callback_param.found = FALSE;
+               if (task->urls) {
+                       g_tree_foreach (task->urls, tree_url_callback, &callback_param);
+               }
+               if (task->emails && callback_param.found == FALSE) {
+                       g_tree_foreach (task->emails, tree_url_callback, &callback_param);
                 }
                 if (callback_param.found == FALSE) {
                         task_cache_add (task, re, 0);
diff --git a/src/printf.c b/src/printf.c

index a8bf0fdb103ca8f7c961c988cce216b35e5b6d53..4155f947864de5b716da8a1db56223954f2a4067 100644 (file)
--- a/src/printf.c
+++ b/src/printf.c
@@ -216,6 +216,7 @@ rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args)
         guint64             ui64;
         guint               width, sign, hex, max_width, frac_width, i;
         f_str_t                    *v;
+       GString            *gs;
  
         if (max <= 0) {
                 return buf;
@@ -312,6 +313,15 @@ rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args)
  
                                 continue;
  
+                       case 'v':
+                               gs = va_arg (args, GString *);
+                               len = gs->len;
+                               len = (buf + len < last) ? len : (size_t) (last - buf);
+
+                               buf = ((gchar *)memcpy (buf, gs->str, len)) + len;
+                               fmt++;
+                               break;
+
                         case 's':
                                 p = va_arg(args, gchar *);
                                 if (p == NULL) {
diff --git a/src/printf.h b/src/printf.h

index 0d41bdc8099becde3c5a5366d42926f6f64c98d6..c1c6866d6cb77f75d59c2c92d44d1f957f95853e 100644 (file)
--- a/src/printf.h
+++ b/src/printf.h
@@ -44,6 +44,7 @@
   *     %r                                          rlim_t
   *     %p                                                  void *
   *     %V                                                  f_str_t *
+ *     %v                          GString *
   *     %s                                                  null-terminated string
   *     %S                                                  ascii null-terminated string
   *     %*s                                             length and string
diff --git a/src/url.c b/src/url.c

index 83492eaab02fa80933edb576ed5784e34d65e724..dbc04ffabbce19643c35617b9fdc3283b77f5242 100644 (file)
--- a/src/url.c
+++ b/src/url.c
@@ -1157,10 +1157,10 @@ url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match
  void
  url_parse_text (memory_pool_t * pool, struct worker_task *task, struct mime_text_part *part, gboolean is_html)
  {
-       gint                            rc, off = 0;
-       gchar                          *url_str = NULL;
+       gint                            rc;
+       gchar                          *url_str = NULL, *url_start, *url_end;
         struct uri                     *new;
-       const guint8                   *p, *end;
+       gchar                          *p, *end, *begin;
  
  
         if (!part->orig->data || part->orig->len == 0) {
@@ -1170,34 +1170,37 @@ url_parse_text (memory_pool_t * pool, struct worker_task *task, struct mime_text
  
         if (url_init () == 0) {
                 if (is_html) {
-                       p = part->orig->data;
-                       end = p + part->orig->len;
+                       begin = part->orig->data;
+                       end = begin + part->orig->len;
+                       p = begin;
                 }
                 else {
-                       p = part->content->data;
-                       end = p + part->content->len;
+                       begin = part->content->data;
+                       end = begin + part->content->len;
+                       p = begin;
                 }
                 while (p < end) {
-                       if (url_try_text (pool, p, end - p, &off, &url_str)) {
-                               if (url_str != NULL &&
-                                               g_tree_lookup (is_html ? part->html_urls : part->urls, url_str) == NULL) {
+                       if (url_try_text (pool, p, end - p, &url_start, &url_end, &url_str)) {
+                               if (url_str != NULL) {
                                         new = memory_pool_alloc0 (pool, sizeof (struct uri));
                                         if (new != NULL) {
                                                 g_strstrip (url_str);
                                                 rc = parse_uri (new, url_str, pool);
                                                 if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) &&
                                                                 new->hostlen > 0) {
+                                                       new->pos = url_start - begin;
+                                                       new->len = url_end - url_start;
                                                         if (new->protocol == PROTOCOL_MAILTO) {
                                                                 if (!g_tree_lookup (task->emails, new)) {
                                                                         g_tree_insert (task->emails, new, new);
                                                                 }
                                                         }
                                                         else {
-                                                               g_tree_insert (is_html ? part->html_urls : part->urls, url_str, new);
                                                                 if (!g_tree_lookup (task->urls, new)) {
                                                                         g_tree_insert (task->urls, new, new);
                                                                 }
                                                         }
+                                                       part->urls_offset = g_list_prepend (part->urls_offset, new);
                                                 }
                                                 else if (rc != URI_ERRNO_OK) {
                                                         msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc));
@@ -1208,13 +1211,18 @@ url_parse_text (memory_pool_t * pool, struct worker_task *task, struct mime_text
                         else {
                                 break;
                         }
-                       p += off;
+                       p = url_end + 1;
                 }
         }
+       /* Handle offsets of this part */
+       if (part->urls_offset != NULL) {
+               part->urls_offset = g_list_reverse (part->urls_offset);
+               memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_list_free, part->urls_offset);
+       }
  }
  
  gboolean
-url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gint *res, gchar **url_str)
+url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **fin, gchar **url_str)
  {
         const gchar                    *end, *pos;
         gint                            idx, l;
@@ -1247,8 +1255,11 @@ url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gint *res, gch
                         else {
                                 *url_str = NULL;
                         }
-                       if (res) {
-                               *res = (pos - begin) + strlen (matcher->pattern);
+                       if (start != NULL) {
+                               *start = (gchar *)pos;
+                       }
+                       if (fin != NULL) {
+                               *fin = (gchar *)pos + m.m_len;
                         }
                         return TRUE;
                 }
diff --git a/src/url.h b/src/url.h

index eb11ceba3c2a5532d91f16ff2eb56d0bed147ee0..9c0812e62655bb2561adde63e5db9b40169a2010 100644 (file)
--- a/src/url.h
+++ b/src/url.h
@@ -32,6 +32,9 @@ struct uri {
  
         struct uri *phished_url;
  
+       gsize pos;
+       gsize len;
+
         /* @protocollen should only be usable if @protocol is either
          * PROTOCOL_USER or an uri string should be composed. */
         guint protocollen;
@@ -76,7 +79,7 @@ enum protocol {
  
  void url_parse_text (memory_pool_t *pool, struct worker_task *task, struct mime_text_part *part, gboolean is_html);
  enum uri_errno parse_uri(struct uri *uri, gchar *uristring, memory_pool_t *pool);
-gboolean url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gint *res, gchar **url_str);
+gboolean url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **end, gchar **url_str);
  const gchar* url_strerror (enum uri_errno err);
  
  #endif
author	Vsevolod Stakhov <vsevolod@rambler-co.ru>
	Thu, 23 Jun 2011 15:05:58 +0000 (19:05 +0400)
committer	Vsevolod Stakhov <vsevolod@rambler-co.ru>
	Thu, 23 Jun 2011 15:05:58 +0000 (19:05 +0400)
src/expressions.c		patch \| blob \| history
src/fuzzy.c		patch \| blob \| history
src/fuzzy.h		patch \| blob \| history
src/html.c		patch \| blob \| history
src/message.c		patch \| blob \| history
src/message.h		patch \| blob \| history
src/plugins/fuzzy_check.c		patch \| blob \| history
src/plugins/regexp.c		patch \| blob \| history
src/printf.c		patch \| blob \| history
src/printf.h		patch \| blob \| history
src/url.c		patch \| blob \| history
src/url.h		patch \| blob \| history