* Fix error in expression parser that causes bad errors with expressions that have...

author Vsevolod Stakhov <vsevolod@rambler-co.ru>

Mon, 23 Mar 2009 11:10:07 +0000 (14:10 +0300)

committer Vsevolod Stakhov <vsevolod@rambler-co.ru>

Mon, 23 Mar 2009 11:10:07 +0000 (14:10 +0300)
author Vsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 23 Mar 2009 11:10:07 +0000 (14:10 +0300)
committer Vsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 23 Mar 2009 11:10:07 +0000 (14:10 +0300)
diff --git a/README.utf8.txt b/README.utf8.txt

index c5bc293c0fac6903016e2125ba8a734413c5c6d8..eed4010a20f771cd50e07f560c190f9c8aa934c7 100644 (file)
--- a/README.utf8.txt
+++ b/README.utf8.txt
@@ -1,7 +1,7 @@
  API Rspamd.
  ===========
  
-TODO.
+API rspamd описано подробно в Doxygen документации.
  
  Логика работы фильтров rspamd.
  ==============================
@@ -118,3 +118,24 @@ $subject_blah = "Subject=/blah/H";
  тогда предыдущее выражение будет таким
  
  SOME_SYMBOL = "${to_blah} & !(${from_blah} | ${subject_blah})"
+
+Логические выражения rspamd
+===========================
+
+Условия, содержащие регулярные выражения, функции, логические операции, скобки, могут использоваться
+для задания правил фильтрации. Общие правила работы:
+- логическими операциями могут быть логическое "И": '&', логическое "ИЛИ": '|' и логическое отрицание:
+'!'.
+- приоритет логических операций такой: & и | -> !, для изменения приоритета можно пользоваться скобками:
+ (A&!B) | !(C|D)
+- пробелы в выражениях игнорируются
+- операнд, содержащий /re/args или же string=/re/args считается регулярным выражением, внутри регулярного
+выражения все символы '/' и '"' должны экранироваться символом '\'. Сам '\' при этом экранировать не нужно.
+- операнд, который принимает аргументы, считается функцией, аргументом функции может являться другая функция,
+при этом порядок вызова функций-аргументов - справа налево (как это сделано в gcc)
+- в rspamd встроен ряд функций:
+  * header_exists - принимает в качестве аргумента имя хедера, возвращает true, если такой заголовок существует
+  * compare_parts_distance - принимает в качестве аргумента число от 0 до 100, которое отражает разницу в процентах
+    между частями письма. Функция работает с сообщениями, содержащими 2 текстовые части (text/plain и text/html) и
+       возвращает true тогда, когда эти части различаются более чем на n процентов. Если аргумент не указан, то
+       по умолчанию ищется различие в 100% (полностью разные части).
diff --git a/perl/Makefile.PL.in b/perl/Makefile.PL.in

index 0d70d427032681057e604dacfe00d8ae49913b3a..10ca63e1663e07805996e50f19544ff35e811fc4 100644 (file)
--- a/perl/Makefile.PL.in
+++ b/perl/Makefile.PL.in
@@ -1,7 +1,7 @@
  use ExtUtils::MakeMaker;
  WriteMakefile(
      NAME         => 'Mail::Rspamd',
-    AUTHOR       => 'Vsevolod Stakhov <vsevolod@rambler-co.ru>',
+    AUTHOR       => 'Vsevolod Stakhov <vsevolod@highsecure.ru>',
      XS           => { 'Rspamd.xs'  => 'Rspamd.c' },
      VERSION_FROM => 'Rspamd.pm', # finds $VERSION
      LIBS         => ['${GLIB_LDFLAGS} ${GMIME_LDFLAGS} -levent'],   # e.g., '-lm'
@@ -15,6 +15,7 @@ WriteMakefile(
             'Rspamd.c'   => qw{
  Rspamd/ContentType.xs            Rspamd/Part.xs                   Rspamd/Hash.xs
  Rspamd/InternetAddress.xs        Rspamd/Message.xs                Rspamd/Object.xs
+Rspamd/TextPart.xs
  },
         },
  );
diff --git a/perl/Rspamd.pod b/perl/Rspamd.pod

index d40574820191090470c8d06d5d8970a61eb94766..2af9b4965f5e7be989013e8b4357a52bcbefdd6a 100644 (file)
--- a/perl/Rspamd.pod
+++ b/perl/Rspamd.pod
@@ -197,7 +197,7 @@ E.g.:
  
  Mail::Rspamd::Header is a private structure. This structure contains
  all the headers except special ones (Content-* MIME-Version).
-Look for L<Header tied hash> for easy maintaining for header.
+Look for Header tied hash for easy maintaining for header.
  Use also the Mail::Rspamd::Message::get_header() and set_header() methods.
  
  =back
@@ -476,6 +476,10 @@ Return Mail::Rspamd::Config object.
  
  Return message's urls as array of strings.
  
+=item I<get_text_parts> ()
+
+Return message's text parts as array of Mail::Rspamd::TextPart objects.
+
  =back
  
  =head2 Mail::Rspamd::Config
@@ -493,15 +497,23 @@ Gets and sets specified parameter in config.
  =item I<get_metric> (metric)
  
  Returns hash of parameters of specified metric:
+
+=begin text
+
  {
  'name'             => name of metric
  'func_name'        => consolidation function
  'required_score'   => score for metric
  }
  
+=end text
+
  =item I<get_statfile> (statfile)
  
  Returns parameters of specified statfile:
+
+=begin text
+
  {
  'alias'         => alias of statfile
  'pattern'       => fs pattern
@@ -510,12 +522,38 @@ Returns parameters of specified statfile:
  'size'          => size of statfile
  }
  
+=end text
+
  =item I<get_module_param> (modulename, paramname)
  
  Return parameter's value for specified module.
  
  =back
  
+=head2 Mail::Rspamd::TextPart
+
+Object that represent a single text part of message.
+
+=over 4
+
+=item I<get_content> ()
+
+Returns content of part.
+
+=item I<get_fuzzy> ()
+
+Returns fuzzy hash of part as string.
+
+=item I<compare_distance> (other)
+
+Calculate distance between two parts using their fuzzy hashes. Return value from 0 (identical) to 100 (totally different).
+
+=item I<is_html> ()
+
+Return 0 if part is plain text and not 0 otherwise.
+
+=back
+
  =head1 CONSTANT VARIABLES
  
      GMIME_LENGTH_ENCODED
diff --git a/perl/Rspamd.xs b/perl/Rspamd.xs

index 9231d3e62f8c8449d7aef183718a1ae92e950f84..892e4b00665f8fdf4bd646f7ee9e565eb5de6d3b 100644 (file)
--- a/perl/Rspamd.xs
+++ b/perl/Rspamd.xs
@@ -14,6 +14,7 @@
  #include "../src/cfg_file.h"
  #include "../src/perl.h"
  #include "../src/mem_pool.h"
+#include "../src/fuzzy.h"
  
  #define XSINTERFACE_FUNC_RSPAMD_MESSAGE_SET(cv,f)      \
         CvXSUBANY(cv).any_dptr = (void (*) (pTHX_ void*))(CAT2( g_mime_message_,f ))
@@ -47,6 +48,7 @@ typedef GMimePartEncodingType Mail__Rspamd__PartEncodingType;
  typedef GMimeObject *          Mail__Rspamd__Object;
  typedef GMimeParam *           Mail__Rspamd__Param;
  typedef GMimePart *            Mail__Rspamd__Part;
+typedef struct mime_text_part * Mail__Rspamd__TextPart;
  typedef GMimeParser *          Mail__Rspamd__Parser;
  typedef GMimeMultipart *       Mail__Rspamd__MultiPart;
  typedef GMimeMessage *         Mail__Rspamd__Message;
@@ -401,5 +403,6 @@ INCLUDE: Rspamd/Message.xs
  
  INCLUDE: Rspamd/InternetAddress.xs
  INCLUDE: Rspamd/Hash.xs
+INCLUDE: Rspamd/TextPart.xs
  
  
diff --git a/perl/Rspamd/Task.xs b/perl/Rspamd/Task.xs

index 16719ef7bc12a676a7affb53d3e9a4e637d61c2d..31928bf7b0bc5cee6b4c51d13aa94740bc312cd2 100644 (file)
--- a/perl/Rspamd/Task.xs
+++ b/perl/Rspamd/Task.xs
@@ -81,3 +81,24 @@ rspamd_task_get_urls (task)
         OUTPUT:
                 RETVAL
  
+AV*
+rspamd_task_get_text_parts (task)
+               Mail::Rspamd::Task task
+       PREINIT:
+               AV* retav;
+               GList *cur;
+               SV* ps;
+       CODE:
+               retav = newAV ();
+               cur = g_list_first (task->text_parts);
+               while (cur) {
+                       ps = newSViv (0);
+                       sv_setref_pv (ps, "Mail::Rspamd::TextPart", (Mail__Rspamd__TextPart)(cur->data));
+                       av_push(retav, ps);
+                       cur = g_list_next (task->text_parts);
+               }
+
+               RETVAL = retav;
+       OUTPUT:
+               RETVAL
+
diff --git a/perl/Rspamd/TextPart.xs b/perl/Rspamd/TextPart.xs

new file mode 100644 (file)

index 0000000..485ee7b
--- /dev/null
+++ b/perl/Rspamd/TextPart.xs
@@ -0,0 +1,40 @@
+MODULE = Mail::Rspamd PACKAGE = Mail::Rspamd::TextPart PREFIX = rspamd_text_part_
+
+SV *
+rspamd_text_part_get_content (mime_part)
+               Mail::Rspamd::TextPart  mime_part
+       PREINIT:
+               SV* content;
+       CODE:
+               ST(0) = &PL_sv_undef;
+               content = sv_newmortal ();
+               SvUPGRADE (content, SVt_PV);
+               SvREADONLY_on (content);
+               SvPVX(content) = (char *) (mime_part->content->data);
+               SvCUR_set (content, mime_part->content->len);
+               SvLEN_set (content, 0);
+               SvPOK_only (content);
+               ST(0) = content;
+
+char *
+rspamd_text_part_get_fuzzy (mime_part)
+               Mail::Rspamd::TextPart  mime_part
+       CODE:
+               RETVAL = mime_part->fuzzy->hash_pipe;
+
+int
+rspamd_text_part_compare_distance (mime_part, other)
+               Mail::Rspamd::TextPart  mime_part
+               Mail::Rspamd::TextPart  other
+       CODE:
+               RETVAL = fuzzy_compare_hashes (mime_part->fuzzy, other->fuzzy);
+       OUTPUT:
+               RETVAL
+
+int
+rspamd_text_part_is_html (mime_part)
+               Mail::Rspamd::TextPart  mime_part
+       CODE:
+               RETVAL = mime_part->is_html;
+       OUTPUT:
+               RETVAL
diff --git a/perl/typemap b/perl/typemap

index fa2dabb436cad740f07182507998537133955cb9..0ae0ef7dee9854fd49779e1e9ae7027e306c3064 100644 (file)
--- a/perl/typemap
+++ b/perl/typemap
@@ -20,6 +20,7 @@ Mail::Rspamd::Object                  T_PTROBJ
  Mail::Rspamd::Param                    T_PTROBJ
  Mail::Rspamd::Message                  T_PTROBJ
  Mail::Rspamd::Part                     T_PTROBJ
+Mail::Rspamd::TextPart         T_PTROBJ
  Mail::Rspamd::ContentType                      T_PTROBJ
  Mail::Rspamd::InternetAddress          T_PTROBJ
  Mail::Rspamd::Hash::Header             T_PTROBJ
diff --git a/src/cfg_utils.c b/src/cfg_utils.c

index 1eeb518edd73aa0b04df7bea9c85aa6d1a1e51d0..037f237546cef7db8ae3b2aa3dfbe5d0e301883d 100644 (file)
--- a/src/cfg_utils.c
+++ b/src/cfg_utils.c
@@ -355,6 +355,11 @@ substitute_variable (struct config_file *cfg, char *str, u_char recursive)
         char *var, *new, *v_begin, *v_end;
         size_t len;
  
+       if (str == NULL) {
+               yywarn ("substitute_variable: trying to substitute variable in NULL string");
+               return NULL;
+       }
+
         while ((v_begin = strstr (str, "${")) != NULL) {
                 len = strlen (str);
                 *v_begin = '\0';
diff --git a/src/expressions.c b/src/expressions.c

index 5cb30e4c31f1fb252d2e0f238e030b818437183b..eefd11f781628de2e2a403c60176f4444fd220f1 100644 (file)
--- a/src/expressions.c
+++ b/src/expressions.c
@@ -26,12 +26,15 @@
  #include "util.h"
  #include "cfg_file.h"
  #include "main.h"
+#include "message.h"
+#include "fuzzy.h"
  #include "expressions.h"
  
  typedef gboolean (*rspamd_internal_func_t)(struct worker_task *, GList *args);
  
  gboolean rspamd_compare_encoding (struct worker_task *task, GList *args);
  gboolean rspamd_header_exists (struct worker_task *task, GList *args);
+gboolean rspamd_parts_distance (struct worker_task *task, GList *args);
  /*
   * List of internal functions of rspamd
   * Sorted by name to use bsearch
@@ -41,6 +44,7 @@ static struct _fl {
         rspamd_internal_func_t func;
  } rspamd_functions_list[] = {
         { "compare_encoding", rspamd_compare_encoding },
+       { "compare_parts_distance", rspamd_parts_distance },
         { "header_exists", rspamd_header_exists },
  };
  
@@ -273,7 +277,9 @@ parse_expression (memory_pool_t *pool, char *line)
  
                         case READ_REGEXP:
                                 if (*p == '/' && *(p - 1) != '\\') {
-                                       p ++;
+                                       if (*(p + 1)) {
+                                               p ++;
+                                       }
                                         state = READ_REGEXP_FLAGS;
                                 }
                                 else {
@@ -285,14 +291,17 @@ parse_expression (memory_pool_t *pool, char *line)
                                 if (!is_regexp_flag (*p) || *(p + 1) == '\0') {
                                         if (c != p) {
                                                 /* Copy operand */
-                                               str = memory_pool_alloc (pool, p - c + 3);
-                                               g_strlcpy (str, c - 1, (p - c + 3));
+                                               if (*(p + 1) == '\0') {
+                                                       p++;
+                                               }
+                                               str = memory_pool_alloc (pool, p - c + 2);
+                                               g_strlcpy (str, c - 1, (p - c + 2));
                                                 g_strstrip (str);
                                                 if (strlen (str) > 0) {
                                                         insert_expression (pool, &expr, EXPR_REGEXP, 0, str);
                                                 }
                                         }
-                                       c = ++p;
+                                       c = p;
                                         state = SKIP_SPACES;
                                 }
                                 else {
@@ -593,6 +602,53 @@ rspamd_header_exists (struct worker_task *task, GList *args)
  #endif
  }
  
+/*
+ * This function is designed to find difference between text/html and text/plain parts
+ * It takes one argument: difference threshold, if we have two text parts, compare 
+ * its hashes and check for threshold, if value is greater than threshold, return TRUE
+ * and return FALSE otherwise.
+ */
+gboolean 
+rspamd_parts_distance (struct worker_task *task, GList *args)
+{      
+       int threshold;
+       struct mime_text_part *p1, *p2;
+       GList *cur;
+       
+       if (args == NULL) {
+               msg_debug ("rspamd_parts_distance: no threshold is specified, assume it 100");
+               threshold = 100;
+       }
+       else {
+               errno = 0;
+               threshold = strtoul ((char *)args->data, NULL, 10);
+               if (errno != 0) {
+                       msg_info ("rspamd_parts_distance: bad numeric value for threshold \"%s\", assume it 100", (char *)args->data);
+                       threshold = 100;
+               }
+       }
+
+       if (g_list_length (task->text_parts) == 2) {
+               cur = g_list_first (task->text_parts);
+               p1 = cur->data;
+               cur = g_list_next (cur);
+               if (cur == NULL) {
+                       msg_info ("rspamd_parts_distance: bad parts list");
+                       return FALSE;
+               }
+               p2 = cur->data;
+               if (fuzzy_compare_hashes (p1->fuzzy, p2->fuzzy) >= threshold) {
+                       return TRUE;
+               }
+       }
+       else {
+               msg_debug ("rspamd_parts_distance: message has too many text parts, so do not try to compare them with each other");
+               return FALSE;
+       }
+
+       return FALSE;
+}
+
  /*
   * vi:ts=4
   */
diff --git a/src/filter.c b/src/filter.c

index 766cd16e45c5ff1576b6248027f0047c99fbdf82..1b6cdc1b02fd94d1405c445e40259ac574ac3e39 100644 (file)
--- a/src/filter.c
+++ b/src/filter.c
@@ -437,10 +437,10 @@ statfiles_callback (gpointer key, gpointer value, void *arg)
         struct classifier *classifier;
         struct statfile_result_data *res_data;
         struct metric *metric;
+       struct mime_text_part *text_part;
  
         GTree *tokens = NULL;
-       GList *cur = NULL;
-       GByteArray *content;
+       GList *cur;
  
         char *filename;
         f_str_t c;
@@ -457,10 +457,12 @@ statfiles_callback (gpointer key, gpointer value, void *arg)
                 return;
         }
         
+       cur = g_list_first (task->text_parts);
         if ((tokens = g_hash_table_lookup (data->tokens, st->tokenizer)) == NULL) {
-               while ((content = get_next_text_part (task->task_pool, task->parts, &cur)) != NULL) {
-                       c.begin = content->data;
-                       c.len = content->len;
+               while (cur != NULL) {
+                       text_part = (struct mime_text_part *)cur->data;
+                       c.begin = text_part->content->data;
+                       c.len = text_part->content->len;
                         /* Tree would be freed at task pool freeing */
                         if (!st->tokenizer->tokenize_func (st->tokenizer, task->task_pool, &c, &tokens)) {
                                 msg_info ("statfiles_callback: cannot tokenize input");
diff --git a/src/fuzzy.c b/src/fuzzy.c

index 08814eaa1d6dedc1c620496b768e654adbdcf56c..4dfec3fb7a14ad9d495fc7e3979ac8a84b6f6477 100644 (file)
--- a/src/fuzzy.c
+++ b/src/fuzzy.c
@@ -95,7 +95,7 @@ fuzzy_update (fuzzy_hash_t *h, char c)
      
         if (h->rh % h->block_size == (h->block_size - 1)) {
                 h->hash_pipe[h->hi] = h->h;
-               if (h->hi < FUZZY_HASHLEN - 1) {
+               if (h->hi < FUZZY_HASHLEN - 2) {
                         h->h = HASH_INIT;
                         h->hi ++;
                 }
@@ -249,6 +249,17 @@ fuzzy_init (f_str_t *in, memory_pool_t *pool)
         return new;
  }
  
+fuzzy_hash_t *
+fuzzy_init_byte_array (GByteArray *in, memory_pool_t *pool)
+{
+       f_str_t f;
+
+       f.begin = in->data;
+       f.len = in->len;
+
+       return fuzzy_init (&f, pool);
+}
+
  /* Compare score of difference between two hashes 0 - different hashes, 100 - identical hashes */
  int
  fuzzy_compare_hashes (fuzzy_hash_t *h1, fuzzy_hash_t *h2) 
diff --git a/src/fuzzy.h b/src/fuzzy.h

index 91e6512c6809780441e83522ae5198c6f6095c36..50d1a9110e3df7a09a84ae17a59847b6409f11d1 100644 (file)
--- a/src/fuzzy.h
+++ b/src/fuzzy.h
@@ -27,6 +27,7 @@ typedef struct fuzzy_hash_s {
   * @return fuzzy_hash object allocated in pool
   */
  fuzzy_hash_t * fuzzy_init (f_str_t *in, memory_pool_t *pool);
+fuzzy_hash_t * fuzzy_init_byte_array (GByteArray *in, memory_pool_t *pool);
  
  /**
   * Compare score of difference between two hashes 
diff --git a/src/main.h b/src/main.h

index 28eb64297e561bd101f3a672c6123225e9dbb6d2..e26ab3fdaf7c7b142bf300ba114c157e11b01cee 100644 (file)
--- a/src/main.h
+++ b/src/main.h
@@ -174,6 +174,7 @@ struct worker_task {
         int parts_count;                                                                                        /**< mime parts count                                                           */
         GMimeMessage *message;                                                                          /**< message, parsed with GMime                                         */
         GList *parts;                                                                                           /**< list of parsed parts                                                       */
+       GList *text_parts;                                                                                      /**< list of text parts                                                         */
         char *raw_headers;                                                                                      /**< list of raw headers                                                        */
         TAILQ_HEAD (uriq, uri) urls;                                                            /**< list of parsed urls                                                        */
         GHashTable *results;                                                                            /**< hash table of metric_result indexed by 
diff --git a/src/message.c b/src/message.c

index 76743f7de8140987c6a7cd179df3e20a994b67b0..807463a82d6f3812f3ad1f606310b102e8852c5f 100644 (file)
--- a/src/message.c
+++ b/src/message.c
@@ -242,6 +242,7 @@ mime_foreach_callback (GMimeObject *part, gpointer user_data)
  {
         struct worker_task *task = (struct worker_task *)user_data;
         struct mime_part *mime_part;
+       struct mime_text_part *text_part;
         GMimeContentType *type;
         GMimeDataWrapper *wrapper;
         GMimeStream *part_stream;
@@ -302,13 +303,27 @@ mime_foreach_callback (GMimeObject *part, gpointer user_data)
                                 mime_part->content = part_content;
                                 msg_debug ("mime_foreach_callback: found part with content-type: %s/%s", type->type, type->subtype);
                                 task->parts = g_list_prepend (task->parts, mime_part);
-                               if (g_mime_content_type_is_type (type, "text", "html")) {
+                               /* Now do special processing for text parts of message */
+                               if (g_mime_content_type_is_type (type, "text", "html") || g_mime_content_type_is_type (type, "text", "xhtml")) {
                                         msg_debug ("mime_foreach_callback: got urls from text/html part");
                                         url_parse_html (task, part_content);
+
+                                       text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
+                                       text_part->content = strip_html_tags (part_content, NULL);
+                                       text_part->is_html = TRUE;
+                                       text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
+                                       memory_pool_add_destructor (task->task_pool, (pool_destruct_func)free_byte_array_callback, text_part->content);
+                                       task->text_parts = g_list_prepend (task->text_parts, text_part);
                                 } 
                                 else if (g_mime_content_type_is_type (type, "text", "plain")) {
-                                       url_parse_text (task, part_content);
                                         msg_debug ("mime_foreach_callback: got urls from text/plain part");
+                                       url_parse_text (task, part_content);
+
+                                       text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
+                                       text_part->content = part_content;
+                                       text_part->is_html = FALSE;
+                                       text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
+                                       task->text_parts = g_list_prepend (task->text_parts, text_part);
                                 }
                         }
                         else {
diff --git a/src/message.h b/src/message.h

index 1122e7e3bfb98309fbb656a934ef099ab68d628e..eaf9f493e39e55fffdea780978fd1b61b62c2d25 100644 (file)
--- a/src/message.h
+++ b/src/message.h
@@ -7,11 +7,17 @@
  #define RSPAMD_MESSAGE_H
  
  #include "config.h"
+#include "fuzzy.h"
  
  struct mime_part {
         GMimeContentType *type;
         GByteArray *content;
-       TAILQ_ENTRY (mime_part) next;
+};
+
+struct mime_text_part {
+       gboolean is_html;
+       GByteArray *content;
+       fuzzy_hash_t *fuzzy;
  };
  
  /**
diff --git a/test/rspamd_fuzzy_test.c b/test/rspamd_fuzzy_test.c

index d737a9171d4a7a7278f71b6a712e42e5803d6762..9feeb4500381fccabdbd8d61b576d16bb9f90628 100644 (file)
--- a/test/rspamd_fuzzy_test.c
+++ b/test/rspamd_fuzzy_test.c
@@ -21,24 +21,56 @@ static char *s2 = "This is sample test text.\r\n"
                                   "abcdefghijklmnopqrstuvwx.\r\n"
                                   "abcdefghijklmnopqrstuvwx.\r\n"
                                   "abcdefghijklmnopqrstuvwx.\r\n";
+static char *s3 = "";
+static char *s4 = "abcdefghijklmn\r\n";
+static char *s5 = "This is sample test text.\r\n"
+                                 "abcdefghijklmnopqrstuvwx.\r\n"
+                                 "abcdefghijklmnopzrstuvwx.\r\n"
+                                 "abcdefghijklmnopqrstuvwx.\r\n"
+                                 "abcdefghijklmnopqrstuvwx.\r\n"
+                                 "abcdefghijklmnopqrstuvwx.\r\n"
+                                 "abcdefghijklmnopqrstuvwx.\r\n"
+                                 "abcdefghijklmnopqrstuvwx.\r\n"
+                                 "abcdefghijklmnopqrstuvwx.\r\n";
  
  void 
  rspamd_fuzzy_test_func ()
  {
         memory_pool_t *pool;
-       fuzzy_hash_t *h1, *h2;
-       f_str_t f1, f2;
+       fuzzy_hash_t *h1, *h2, *h3, *h4, *h5;
+       f_str_t f1, f2, f3, f4, f5;
+       int diff1, diff2;
  
         pool = memory_pool_new (1024);
         f1.begin = s1;
         f1.len = strlen (s1);
         f2.begin = s2;
         f2.len = strlen (s2);
+       f3.begin = s3;
+       f3.len = strlen (s3);
+       f4.begin = s4;
+       f4.len = strlen (s4);
+       f5.begin = s5;
+       f5.len = strlen (s5);
  
         h1 = fuzzy_init (&f1, pool);
         h2 = fuzzy_init (&f2, pool);
+       h3 = fuzzy_init (&f3, pool);
+       h4 = fuzzy_init (&f4, pool);
+       h5 = fuzzy_init (&f5, pool);
  
-       msg_info ("rspamd_fuzzy_test_func: difference between strings is %d", fuzzy_compare_hashes (h1, h2));
+       diff1 = fuzzy_compare_hashes (h3, h4) + fuzzy_compare_hashes (h2, h4);
+       diff2 = fuzzy_compare_hashes (h2, h5);
+       msg_debug ("rspamd_fuzzy_test_func: s1, s2 difference between strings is %d", fuzzy_compare_hashes (h1, h2));
+       msg_debug ("rspamd_fuzzy_test_func: s1, s3 difference between strings is %d", fuzzy_compare_hashes (h1, h3));
+       msg_debug ("rspamd_fuzzy_test_func: s3, s4 difference between strings is %d", fuzzy_compare_hashes (h3, h4));
+       msg_debug ("rspamd_fuzzy_test_func: s2, s4 difference between strings is %d", fuzzy_compare_hashes (h2, h4));
+       msg_debug ("rspamd_fuzzy_test_func: s2, s5 difference between strings is %d", diff2);
+       
+       /* Identical strings */
+       g_assert (diff2 == 0);
+       /* Totally different strings */
+       g_assert (diff1 == 200);
  
         memory_pool_delete (pool);
  }
author	Vsevolod Stakhov <vsevolod@rambler-co.ru>
	Mon, 23 Mar 2009 11:10:07 +0000 (14:10 +0300)
committer	Vsevolod Stakhov <vsevolod@rambler-co.ru>
	Mon, 23 Mar 2009 11:10:07 +0000 (14:10 +0300)
README.utf8.txt		patch \| blob \| history
perl/Makefile.PL.in		patch \| blob \| history
perl/Rspamd.pod		patch \| blob \| history
perl/Rspamd.xs		patch \| blob \| history
perl/Rspamd/Task.xs		patch \| blob \| history
perl/Rspamd/TextPart.xs	[new file with mode: 0644]	patch \| blob
perl/typemap		patch \| blob \| history
src/cfg_utils.c		patch \| blob \| history
src/expressions.c		patch \| blob \| history
src/filter.c		patch \| blob \| history
src/fuzzy.c		patch \| blob \| history
src/fuzzy.h		patch \| blob \| history
src/main.h		patch \| blob \| history
src/message.c		patch \| blob \| history
src/message.h		patch \| blob \| history
test/rspamd_fuzzy_test.c		patch \| blob \| history