* Welcome 0.4.0

author Vsevolod Stakhov <vsevolod@rambler-co.ru>

Fri, 24 Jun 2011 16:25:54 +0000 (20:25 +0400)

committer Vsevolod Stakhov <vsevolod@rambler-co.ru>

Fri, 24 Jun 2011 16:25:54 +0000 (20:25 +0400)
author Vsevolod Stakhov <vsevolod@rambler-co.ru>
Fri, 24 Jun 2011 16:25:54 +0000 (20:25 +0400)
committer Vsevolod Stakhov <vsevolod@rambler-co.ru>
Fri, 24 Jun 2011 16:25:54 +0000 (20:25 +0400)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index 27b088160f4d75718ceaa2aa1a4aaa69e8fb7a43..1cffe6953732cc0c6bdd0e7244c9e2589e5d0fa0 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,8 +6,8 @@
  PROJECT(rspamd C)
  
  SET(RSPAMD_VERSION_MAJOR 0)
-SET(RSPAMD_VERSION_MINOR 3)
-SET(RSPAMD_VERSION_PATCH 14)
+SET(RSPAMD_VERSION_MINOR 4)
+SET(RSPAMD_VERSION_PATCH 0)
  
  
  SET(RSPAMD_VERSION         "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}")
diff --git a/src/binlog.c b/src/binlog.c

index 76cfa0ccac812a7306d6ff92c293efb2b14866cb..2a1eb9fcb719b9160202dbd13599eafa7e2efa53 100644 (file)
--- a/src/binlog.c
+++ b/src/binlog.c
@@ -120,7 +120,7 @@ binlog_check_file (struct rspamd_binlog *log)
                 return FALSE;
         }
  
-       log->cur_seq = log->cur_idx->last_index;
+       log->cur_seq = log->metaindex->last_index * BINLOG_IDX_LEN + log->cur_idx->last_index;
         log->cur_time = log->cur_idx->indexes[log->cur_idx->last_index].time;
  
         return TRUE;
@@ -425,6 +425,13 @@ binlog_sync (struct rspamd_binlog *log, guint64 from_rev, guint64 *from_time, GB
                 *rep = NULL;
                 return FALSE;
         }
+       else if (from_rev > log->cur_seq) {
+               /* Slave has more actual copy, write this to log and abort sync */
+               msg_warn ("slave has more recent revision of statfile %s: %uL and our is: %uL", log->filename, from_rev, log->cur_seq);
+               *rep = NULL;
+               *from_time = 0;
+               return FALSE;
+       }
  
         metaindex_num = from_rev / BINLOG_IDX_LEN;
         /* First of all try to find this revision */
diff --git a/src/cfg_file.h b/src/cfg_file.h

index 9e10cd8fc32f2dc61dc3da5bf3c75d98b119a643..268906d40216c3af50801974591aa104e9bb696d 100644 (file)
--- a/src/cfg_file.h
+++ b/src/cfg_file.h
@@ -270,6 +270,8 @@ struct config_file {
         gboolean log_extended;                                                  /**< log extended information                                                   */
  
         gsize max_statfile_size;                                                /**< maximum size for statfile                                                  */
+       guint32 statfile_sync_interval;                                 /**< synchronization interval                                                   */
+       guint32 statfile_sync_timeout;                                  /**< synchronization timeout                                                    */
  
         struct memcached_server memcached_servers[MAX_MEMCACHED_SERVERS];       /**< memcached servers                          */
         gsize memcached_servers_num;                                    /**< number of memcached servers                                                */
diff --git a/src/cfg_utils.c b/src/cfg_utils.c

index 99a4bc3d91c219d5d2f77f7c3c1d8ed9313f9363..720d931ef0c23f3f2df08445aa9f487bc9518080 100644 (file)
--- a/src/cfg_utils.c
+++ b/src/cfg_utils.c
@@ -172,6 +172,9 @@ init_defaults (struct config_file *cfg)
         cfg->dns_throttling_errors = 20;
         cfg->dns_throttling_time = 10000;
  
+       cfg->statfile_sync_interval = 60000;
+       cfg->statfile_sync_timeout = 20000;
+
         cfg->max_statfile_size = DEFAULT_STATFILE_SIZE;
         cfg->modules_opts = g_hash_table_new (g_str_hash, g_str_equal);
         cfg->variables = g_hash_table_new (g_str_hash, g_str_equal);
diff --git a/src/cfg_xml.c b/src/cfg_xml.c

index fba925cc6f36640d914d28c7f8dae43d72eb1000..09f6574a00b2f86371c21d39fbfce10430f9ec41 100644 (file)
--- a/src/cfg_xml.c
+++ b/src/cfg_xml.c
@@ -281,6 +281,18 @@ static struct xml_parser_rule grammar[] = {
                                 G_STRUCT_OFFSET (struct config_file, filters_str),
                                 NULL
                         },
+                       {
+                               "sync_interval",
+                               xml_handle_seconds,
+                               G_STRUCT_OFFSET (struct config_file, statfile_sync_interval),
+                               NULL
+                       },
+                       {
+                               "sync_timeout",
+                               xml_handle_seconds,
+                               G_STRUCT_OFFSET (struct config_file, statfile_sync_timeout),
+                               NULL
+                       },
                         NULL_ATTR
                 },
                 NULL_DEF_ATTR
diff --git a/src/classifiers/bayes.c b/src/classifiers/bayes.c

index b4f7826e5b2870c4d2ebd6813dacb94be554564a..dadd33e5ee08e424bc988dcb1653628ccd266f04 100644 (file)
--- a/src/classifiers/bayes.c
+++ b/src/classifiers/bayes.c
@@ -43,11 +43,11 @@ bayes_error_quark (void)
  }
  
  struct bayes_statfile_data {
-       double                          hits;
-       double                          total_hits;
+       guint64                         hits;
+       guint64                         total_hits;
         double                          local_probability;
         double                          post_probability;
-       double                          value;
+       guint                           value;
         struct statfile                *st;
         stat_file_t                    *file;
  };
@@ -67,25 +67,22 @@ bayes_learn_callback (gpointer key, gpointer value, gpointer data)
  {
         token_node_t                   *node = key;
         struct bayes_callback_data     *cd = data;
-       double                          v, c;
+       gint                            v, c;
  
         c = (cd->in_class) ? 1 : -1;
  
         /* Consider that not found blocks have value 1 */
         v = statfile_pool_get_block (cd->pool, cd->file, node->h1, node->h2, cd->now);
-       if (fabs (v) < ALPHA && c > 0) {
+       if (v == 0 && c > 0) {
                 statfile_pool_set_block (cd->pool, cd->file, node->h1, node->h2, cd->now, c);
         }
         else {
-               if (G_LIKELY (c > 0 && c < G_MAXDOUBLE)) {
-                       v += c;
+               if (G_LIKELY (c > 0)) {
+                       v ++;
                 }
                 else if (c < 0){
-                       if (v > -c) {
-                               v -= c;
-                       }
-                       else {
-                               v = 0;
+                       if (v != 0) {
+                               v --;
                         }
                 }
                 statfile_pool_set_block (cd->pool, cd->file, node->h1, node->h2, cd->now, v);
@@ -103,14 +100,15 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data)
  
         token_node_t                   *node = key;
         struct bayes_callback_data     *cd = data;
-       double                          local_hits = 0, renorm = 0;
-       int                             i;
+       double                          renorm = 0;
+       gint                            i;
+       guint64                         local_hits = 0;
         struct bayes_statfile_data     *cur;
  
         for (i = 0; i < cd->statfiles_num; i ++) {
                 cur = &cd->statfiles[i];
                 cur->value = statfile_pool_get_block (cd->pool, cur->file, node->h1, node->h2, cd->now);
-               if (cur->value > ALPHA) {
+               if (cur->value > 0) {
                         cur->total_hits += cur->value;
                         cur->hits = cur->value;
                         local_hits += cur->value;
@@ -121,7 +119,8 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data)
         }
         for (i = 0; i < cd->statfiles_num; i ++) {
                 cur = &cd->statfiles[i];
-               cur->local_probability = 0.5 + (cur->value - (local_hits - cur->value)) / (LOCAL_PROB_DENOM * (local_hits + 1.0));
+               cur->local_probability = 0.5 + ((double)cur->value - ((double)local_hits - cur->value)) /
+                               (LOCAL_PROB_DENOM * (1.0 + local_hits));
                 renorm += cur->post_probability * cur->local_probability;
         }
  
diff --git a/src/controller.c b/src/controller.c

index f899518fe181e7a60aa77b010567dbaa1cee89a7..e7e4c10d86b020b01ea943073ad0919d59dddfe4 100644 (file)
--- a/src/controller.c
+++ b/src/controller.c
@@ -212,7 +212,7 @@ write_whole_statfile (struct controller_session *session, gchar *symbol, struct
         struct statfile                *st;
         gchar                           out_buf[BUFSIZ];
         gint                            i;
-       guint64                         rev, ti, len, pos;
+       guint64                         rev, ti, len, pos, blocks;
         gchar                           *out;
         struct rspamd_binlog_element    log_elt;
         struct stat_file_block         *stat_elt;
@@ -222,7 +222,7 @@ write_whole_statfile (struct controller_session *session, gchar *symbol, struct
         if (statfile == NULL) {
                 return FALSE;
         }
-       
+
         /* Begin to copy all blocks into array */
         statfile_get_revision (statfile, &rev, (time_t *)&ti);
         if (ti == 0) {
@@ -230,10 +230,13 @@ write_whole_statfile (struct controller_session *session, gchar *symbol, struct
                 ti = time (NULL);
                 statfile_set_revision (statfile, rev, ti);
         }
-       len = statfile->cur_section.length * sizeof (struct rspamd_binlog_element);
+       msg_info ("send a whole statfile %s with version %uL to slave", symbol, rev);
+
+       blocks = statfile_get_total_blocks (statfile);
+       len = blocks * sizeof (struct rspamd_binlog_element);
         out = memory_pool_alloc (session->session_pool, len);
  
-       for (i = 0, pos = 0; i < statfile->cur_section.length; i ++) {
+       for (i = 0, pos = 0; i < blocks; i ++) {
                 stat_elt = (struct stat_file_block *)((u_char *)statfile->map + statfile->seek_pos + i * sizeof (struct stat_file_block));
                 if (fabs (stat_elt->value) > 0.001) {
                         /* Write only those values which value is not 0 */
@@ -324,6 +327,7 @@ process_sync_command (struct controller_session *session, gchar **args)
         }
         
         while (binlog_sync (binlog, rev, &time, &data)) {
+               rev ++;
                 r = rspamd_snprintf (out_buf, sizeof (out_buf), "%uL %uL %z" CRLF, rev, time, data->len);
                 if (! rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE)) {
                         if (data != NULL) {
@@ -339,7 +343,6 @@ process_sync_command (struct controller_session *session, gchar **args)
                                 return FALSE;
                         }
                 }
-               rev ++;
         }
  
         if (time == 0) {
@@ -666,12 +669,6 @@ process_command (struct controller_command *cmd, gchar **cmd_args, struct contro
                         }
                         return TRUE;
                 }
-               else {
-                       if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) {
-                               return FALSE;
-                       }
-                       return TRUE;
-               }
                 break;
         case COMMAND_HELP:
                 r = rspamd_snprintf (out_buf, sizeof (out_buf),
@@ -851,7 +848,7 @@ controller_read_socket (f_str_t * in, void *arg)
                         c.begin = part->content->data;
                         c.len = part->content->len;
                         if (!session->learn_classifier->tokenizer->tokenize_func (session->learn_classifier->tokenizer,
-                                       session->session_pool, &c, &tokens, FALSE, part->is_utf)) {
+                                       session->session_pool, &c, &tokens, FALSE, part->is_utf, part->urls_offset)) {
                                 i = rspamd_snprintf (out_buf, sizeof (out_buf), "weights failed, tokenizer error" CRLF END);
                                 free_task (task, FALSE);
                                 if (!rspamd_dispatcher_write (session->dispatcher, out_buf, i, FALSE, FALSE)) {
diff --git a/src/filter.c b/src/filter.c

index 2c094fda8f21a93ad30e8409216154754a195610..797b4f6fe4f55ca97090233aaaa237f0c6969712 100644 (file)
--- a/src/filter.c
+++ b/src/filter.c
@@ -612,7 +612,7 @@ classifiers_callback (gpointer value, void *arg)
                                 c.len = strlen (cur->data);
                                 if (c.len > 0) {
                                         c.begin = cur->data;
-                                       if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE, FALSE)) {
+                                       if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE, FALSE, NULL)) {
                                                 msg_info ("cannot tokenize input");
                                                 return;
                                         }
@@ -627,7 +627,7 @@ classifiers_callback (gpointer value, void *arg)
                                 c.begin = text_part->content->data;
                                 c.len = text_part->content->len;
                                 /* Tree would be freed at task pool freeing */
-                               if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE, text_part->is_utf)) {
+                               if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE, text_part->is_utf, text_part->urls_offset)) {
                                         msg_info ("cannot tokenize input");
                                         return;
                                 }
@@ -805,7 +805,7 @@ check_metric_action (double score, double required_score, struct metric *metric)
  gboolean
  learn_task (const gchar *statfile, struct worker_task *task, GError **err)
  {
-       GList                          *cur;
+       GList                          *cur, *ex;
         struct classifier_config       *cl;
         struct classifier_ctx          *cls_ctx;
         gchar                          *s;
@@ -841,6 +841,7 @@ learn_task (const gchar *statfile, struct worker_task *task, GError **err)
                 if (s != NULL) {
                         c.len = strlen (cur->data);
                         c.begin = cur->data;
+                       ex = NULL;
                 }
                 else {
                         part = cur->data;
@@ -852,11 +853,12 @@ learn_task (const gchar *statfile, struct worker_task *task, GError **err)
                         c.begin = part->content->data;
                         c.len = part->content->len;
                         is_utf = part->is_utf;
+                       ex = part->urls_offset;
                 }
                 /* Get tokens */
                 if (!cl->tokenizer->tokenize_func (
                                 cl->tokenizer, task->task_pool,
-                               &c, &tokens, FALSE, is_utf)) {
+                               &c, &tokens, FALSE, is_utf, ex)) {
                         g_set_error (err, filter_error_quark(), 2, "Cannot tokenize message");
                         return FALSE;
                 }
diff --git a/src/fstring.c b/src/fstring.c

index 5fcb12bd2a2e0a84d0303ed0ec6f3ed8971b3dcd..84c8c54bd3fc2aebf3c99aecf1bab61d0a7c7b4c 100644 (file)
--- a/src/fstring.c
+++ b/src/fstring.c
@@ -297,6 +297,34 @@ fstrgrow (memory_pool_t * pool, f_str_t * orig, size_t newlen)
         return res;
  }
  
+static guint32
+fstrhash_c (gchar c, guint32 hval)
+{
+       guint32                         tmp;
+       /*
+        * xor in the current byte against each byte of hval
+        * (which alone gaurantees that every bit of input will have
+        * an effect on the output)
+        */
+       tmp = c & 0xFF;
+       tmp = tmp | (tmp << 8) | (tmp << 16) | (tmp << 24);
+       hval ^= tmp;
+
+       /* add some bits out of the middle as low order bits */
+       hval = hval + ((hval >> 12) & 0x0000ffff);
+
+       /* swap most and min significative bytes */
+       tmp = (hval << 24) | ((hval >> 24) & 0xff);
+       /* zero most and min significative bytes of hval */
+       hval &= 0x00ffff00;
+       hval |= tmp;
+       /*
+        * rotate hval 3 bits to the left (thereby making the
+        * 3rd msb of the above mess the hsb of the output hash)
+        */
+       return (hval << 3) + (hval >> 29);
+}
+
  /*
   * Return hash value for a string
   */
@@ -305,7 +333,6 @@ fstrhash (f_str_t * str)
  {
         size_t                          i;
         guint32                         hval;
-       guint32                         tmp;
         gchar                           *c = str->begin;
  
         if (str == NULL) {
@@ -314,32 +341,54 @@ fstrhash (f_str_t * str)
         hval = str->len;
  
         for (i = 0; i < str->len; i++, c++) {
-               /* 
-                * xor in the current byte against each byte of hval
-                * (which alone gaurantees that every bit of input will have
-                * an effect on the output)
-                */
-               tmp = *c & 0xFF;
-               tmp = tmp | (tmp << 8) | (tmp << 16) | (tmp << 24);
-               hval ^= tmp;
-
-               /* add some bits out of the middle as low order bits */
-               hval = hval + ((hval >> 12) & 0x0000ffff);
-
-               /* swap most and min significative bytes */
-               tmp = (hval << 24) | ((hval >> 24) & 0xff);
-               /* zero most and min significative bytes of hval */
-               hval &= 0x00ffff00;
-               hval |= tmp;
-               /*
-                * rotate hval 3 bits to the left (thereby making the
-                * 3rd msb of the above mess the hsb of the output hash)
-                */
-               hval = (hval << 3) + (hval >> 29);
+               hval = fstrhash_c (*c, hval);
         }
         return hval;
  }
  
+/*
+ * Return hash value for a string
+ */
+guint32
+fstrhash_lowercase (f_str_t * str, gboolean is_utf)
+{
+       gsize                           i;
+       guint32                         j, hval;
+       const gchar                    *p = str->begin, *end = NULL;
+       gchar                           t;
+       gunichar                        uc;
+
+       if (str == NULL) {
+               return 0;
+       }
+       hval = str->len;
+
+       if (is_utf) {
+               while (end < str->begin + str->len) {
+                       g_utf8_validate (p, str->len, &end);
+                       while (p < end) {
+                               uc = g_unichar_tolower (g_utf8_get_char (p));
+                               for (j = 0; j < sizeof (gunichar); j ++) {
+                                       t = (uc >> (j * 8)) & 0xff;
+                                       if (t != 0) {
+                                               hval = fstrhash_c (t, hval);
+                                       }
+                               }
+                               p = g_utf8_next_char (p);
+                       }
+                       p = end + 1;
+               }
+
+       }
+       else {
+               for (i = 0; i < str->len; i++, p++) {
+                       hval = fstrhash_c (g_ascii_tolower (*p), hval);
+               }
+       }
+
+       return hval;
+}
+
  void
  fstrstrip (f_str_t * str)
  {
diff --git a/src/fstring.h b/src/fstring.h

index 616287fe493bda7869b4227b715245aa723b5470..eb32dc2858221b0581123aa39dc3c02ba91d1fff 100644 (file)
--- a/src/fstring.h
+++ b/src/fstring.h
@@ -93,6 +93,10 @@ f_str_t* fstrgrow (memory_pool_t *pool, f_str_t *orig, size_t newlen);
   */
  guint32 fstrhash (f_str_t *str);
  
+/*
+ * Return fast hash value for fixed string converted to lowercase
+ */
+guint32 fstrhash_lowercase (f_str_t *str, gboolean is_utf);
  /*
   * Make copy of string to 0-terminated string
   */
diff --git a/src/fuzzy.c b/src/fuzzy.c

index 1a9337071149af9c4736ebe06bf9b67e62b8135b..0a24494e2ca748191f4265e4e5a2643423d063d0 100644 (file)
--- a/src/fuzzy.c
+++ b/src/fuzzy.c
@@ -320,11 +320,11 @@ fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool)
         gchar                          *c;
         gsize                           real_len = 0, len = part->content->len;
         GList                          *cur_offset;
-       struct uri                     *cur_url = NULL;
+       struct process_exception       *cur_ex = NULL;
  
         cur_offset = part->urls_offset;
         if (cur_offset != NULL) {
-               cur_url = cur_offset->data;
+               cur_ex = cur_offset->data;
         }
  
         c = part->content->data;
@@ -332,12 +332,12 @@ fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool)
         new2 = memory_pool_alloc0 (pool, sizeof (fuzzy_hash_t));
         bzero (&rs, sizeof (rs));
         for (i = 0; i < len;) {
-               if (cur_url != NULL && cur_url->pos == i) {
-                       i += cur_url->len + 1;
-                       c += cur_url->len + 1;
+               if (cur_ex != NULL && cur_ex->pos == i) {
+                       i += cur_ex->len + 1;
+                       c += cur_ex->len + 1;
                         cur_offset = g_list_next (cur_offset);
                         if (cur_offset != NULL) {
-                               cur_url = cur_offset->data;
+                               cur_ex = cur_offset->data;
                         }
                 }
                 else {
@@ -354,18 +354,18 @@ fuzzy_init_part (struct mime_text_part *part, memory_pool_t *pool)
  
         cur_offset = part->urls_offset;
         if (cur_offset != NULL) {
-               cur_url = cur_offset->data;
+               cur_ex = cur_offset->data;
         }
  
         c = part->content->data;
  
         for (i = 0; i < len;) {
-               if (cur_url != NULL && cur_url->pos == i) {
-                       i += cur_url->len + 1;
-                       c += cur_url->len + 1;
+               if (cur_ex != NULL && cur_ex->pos == i) {
+                       i += cur_ex->len + 1;
+                       c += cur_ex->len + 1;
                         cur_offset = g_list_next (cur_offset);
                         if (cur_offset != NULL) {
-                               cur_url = cur_offset->data;
+                               cur_ex = cur_offset->data;
                         }
                 }
                 else {
diff --git a/src/main.h b/src/main.h

index d8f90b03f903b553a26a3b7945afa7ff99813b76..9a3335d0ab47309c65426a65a1046c7587bafd40 100644 (file)
--- a/src/main.h
+++ b/src/main.h
@@ -113,6 +113,13 @@ struct save_point {
         guint saved;                                                                                    /**< how much time we have delayed processing           */
  };
  
+/**
+ * Structure to point exception in text from processing
+ */
+struct process_exception {
+       gsize pos;
+       gsize len;
+};
  
  /**
   * Union that would be used for storing sockaddrs
diff --git a/src/mem_pool.c b/src/mem_pool.c

index 5c48b55eef92750aced920fd168f7380a734c34d..43ffc86a0aa37cfe88720819ef4937bde92075fd 100644 (file)
--- a/src/mem_pool.c
+++ b/src/mem_pool.c
@@ -91,6 +91,7 @@ pool_chain_new (gsize size)
         chain->pos = align_ptr (chain->begin, MEM_ALIGNMENT);
         chain->next = NULL;
         STAT_LOCK ();
+       mem_pool_stat->bytes_allocated += size;
         mem_pool_stat->chunks_allocated++;
         STAT_UNLOCK ();
  
@@ -135,6 +136,7 @@ pool_chain_new_shared (gsize size)
         chain->next = NULL;
         STAT_LOCK ();
         mem_pool_stat->shared_chunks_allocated++;
+       mem_pool_stat->bytes_allocated += size;
         STAT_UNLOCK ();
  
         return chain;
@@ -225,16 +227,11 @@ memory_pool_alloc (memory_pool_t * pool, gsize size)
                         cur->next = new;
                         pool->cur_pool = new;
                         new->pos += size;
-                       STAT_LOCK ();
-                       mem_pool_stat->bytes_allocated += size;
-                       STAT_UNLOCK ();
+
                         return new->begin;
                 }
                 tmp = align_ptr (cur->pos, MEM_ALIGNMENT);
                 cur->pos = tmp + size;
-               STAT_LOCK ();
-               mem_pool_stat->bytes_allocated += size;
-               STAT_UNLOCK ();
                 return tmp;
         }
         return NULL;
@@ -349,9 +346,6 @@ memory_pool_alloc_shared (memory_pool_t * pool, gsize size)
                 }
                 tmp = align_ptr (cur->pos, MEM_ALIGNMENT);
                 cur->pos = tmp + size;
-               STAT_LOCK ();
-               mem_pool_stat->bytes_allocated += size;
-               STAT_UNLOCK ();
                 return tmp;
         }
         return NULL;
@@ -506,20 +500,22 @@ memory_pool_delete (memory_pool_t * pool)
         while (cur) {
                 tmp = cur;
                 cur = cur->next;
-               g_slice_free1 (tmp->len, tmp->begin);
-               g_slice_free (struct _pool_chain, tmp);
                 STAT_LOCK ();
                 mem_pool_stat->chunks_freed++;
+               mem_pool_stat->bytes_allocated -= tmp->len;
                 STAT_UNLOCK ();
+               g_slice_free1 (tmp->len, tmp->begin);
+               g_slice_free (struct _pool_chain, tmp);
         }
         /* Unmap shared memory */
         while (cur_shared) {
                 tmp_shared = cur_shared;
                 cur_shared = cur_shared->next;
-               munmap ((void *)tmp_shared, tmp_shared->len + sizeof (struct _pool_chain_shared));
                 STAT_LOCK ();
                 mem_pool_stat->chunks_freed++;
+               mem_pool_stat->bytes_allocated -= tmp->len;
                 STAT_UNLOCK ();
+               munmap ((void *)tmp_shared, tmp_shared->len + sizeof (struct _pool_chain_shared));
         }
         if (pool->variables) {
                 g_hash_table_destroy (pool->variables);
diff --git a/src/message.c b/src/message.c

index 0586be8d7305e7064e31742e8915669cf9d6f3e2..4db4bef7df4d7c716da5f6081d95014c5c3a21b9 100644 (file)
--- a/src/message.c
+++ b/src/message.c
@@ -686,25 +686,6 @@ free_byte_array_callback (void *pointer)
         g_byte_array_free (arr, TRUE);
  }
  
-static void
-detect_real_charset (struct worker_task *task, GByteArray * part_content, struct mime_text_part *text_part)
-{
-       /* First of all try to detect UTF symbols */
-       text_part->is_utf = FALSE;
-       /* At first decision try to validate a single character */
-       if (g_utf8_get_char_validated (part_content->data, part_content->len) != -1) {
-               /* Now validate the whole part */
-               if (g_utf8_validate (part_content->data, part_content->len, NULL)) {
-                       text_part->is_utf = TRUE;
-                       text_part->real_charset = UTF8_CHARSET;
-                       return;
-               }
-       }
-       
-       /* Now try to detect specific symbols from some charsets */
-       
-}
-
  static GByteArray              *
  convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeContentType * type, struct mime_text_part *text_part)
  {
@@ -726,6 +707,7 @@ convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeC
  
         if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) {
                 text_part->is_raw = FALSE;
+               text_part->is_utf = TRUE;
                 return part_content;
         }
  
@@ -741,6 +723,7 @@ convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeC
         result_array->len = write_bytes;
         memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_free, res_str);
         text_part->is_raw = FALSE;
+       text_part->is_utf = TRUE;
  
         return result_array;
  }
diff --git a/src/statfile.c b/src/statfile.c

index 9d21b5adff12ff60cdd8b299bd0d0598abe00df4..0359c0c4d2d2e6982bcf20494a4ad23463cedb62 100644 (file)
--- a/src/statfile.c
+++ b/src/statfile.c
@@ -580,7 +580,6 @@ statfile_pool_get_block (statfile_pool_t * pool, stat_file_t * file, guint32 h1,
         return 0;
  }
  
-#define RANDOM_EXPIRE G_MAXINT / CHAIN_LENGTH
  static void
  statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, guint32 h1, guint32 h2, time_t t, double value, gboolean from_now)
  {
@@ -590,7 +589,6 @@ statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, guin
         u_char                         *c;
      double                          min = G_MAXDOUBLE;
  
-
         if (from_now) {
                 file->access_time = t;
         }
@@ -626,14 +624,10 @@ statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, guin
                         return;
                 }
                 
-               /* Expire block if we have some random value that is lower than RANDOM_EXPIRE value */
-               if (g_random_int () < RANDOM_EXPIRE) {
-                       to_expire = block;
-                       break;
-               }
                 /* Expire block with minimum value otherwise */
                 if (block->value < min) {
                         to_expire = block;
+                       min = block->value;
                 }
                 c += sizeof (struct stat_file_block);
                 block = (struct stat_file_block *)c;
diff --git a/src/statfile_sync.c b/src/statfile_sync.c

index 5189f1ead7c9ddfc26bc8721bad5ef85b3e64861..44e34454b92d1bd76100fb18e3cd95a39cf9b9af 100644 (file)
--- a/src/statfile_sync.c
+++ b/src/statfile_sync.c
@@ -31,11 +31,6 @@
  #include "buffer.h"
  #include "statfile_sync.h"
  
-/* XXX: hardcoding this value is not very smart */
-#define MAX_SYNC_TIME 60
-#define IO_TIMEOUT 20
-
-
  enum rspamd_sync_state {
         SYNC_STATE_GREETING,
         SYNC_STATE_READ_LINE,
@@ -54,7 +49,9 @@ struct rspamd_sync_ctx {
  
         struct timeval interval;
         struct timeval io_tv;
-       gint                            sock;
+       gint sock;
+       guint32 timeout;
+       guint32 sync_interval;
         enum rspamd_sync_state state;
         gboolean is_busy;
  
@@ -141,9 +138,9 @@ parse_revision_line (struct rspamd_sync_ctx *ctx, f_str_t *in)
                         }
                 }
         }
-       
+
         /* Current value must be len value and its value must not be 0 */
-       return ((val == &ctx->new_len) && *val != 0);
+       return ((val == &ctx->new_len));
  }
  
  static                          gboolean
@@ -193,8 +190,10 @@ sync_read (f_str_t * in, void *arg)
                                 return FALSE;
                         }
                         else if (ctx->state != SYNC_STATE_QUIT) {
-                               ctx->state = SYNC_STATE_READ_REV;
-                               rspamd_set_dispatcher_policy (ctx->dispatcher, BUFFER_CHARACTER, ctx->new_len);
+                               if (ctx->new_len > 0) {
+                                       ctx->state = SYNC_STATE_READ_REV;
+                                       rspamd_set_dispatcher_policy (ctx->dispatcher, BUFFER_CHARACTER, ctx->new_len);
+                               }
                         }
                         else {
                                 /* Quit this session */
@@ -247,11 +246,13 @@ static void
  sync_timer_callback (gint fd, short what, void *ud)
  {
         struct rspamd_sync_ctx *ctx = ud;
+       guint32 jittered_interval;
         
         /* Plan new event */
         evtimer_del (&ctx->tm_ev);
-       ctx->interval.tv_sec = g_random_int_range (MAX_SYNC_TIME, MAX_SYNC_TIME * 2);
-       ctx->interval.tv_usec = 0;
+       /* Add some jittering for synchronization */
+       jittered_interval = g_random_int_range (ctx->sync_interval, ctx->sync_interval * 2);
+       msec_to_tv (jittered_interval, &ctx->interval);
         evtimer_add (&ctx->tm_ev, &ctx->interval);
         log_next_sync (ctx->st->symbol, ctx->interval.tv_sec);
         
@@ -266,8 +267,7 @@ sync_timer_callback (gint fd, short what, void *ud)
                 return;
         }
         /* Now create and activate dispatcher */
-       ctx->io_tv.tv_sec = IO_TIMEOUT;
-       ctx->io_tv.tv_usec = 0;
+       msec_to_tv (ctx->timeout, &ctx->io_tv);
         ctx->dispatcher = rspamd_create_dispatcher (ctx->sock, BUFFER_LINE, sync_read, NULL, sync_err, &ctx->io_tv, ctx);
         
         ctx->state = SYNC_STATE_GREETING;
@@ -278,17 +278,20 @@ sync_timer_callback (gint fd, short what, void *ud)
  }
  
  static gboolean
-add_statfile_watch (statfile_pool_t *pool, struct statfile *st)
+add_statfile_watch (statfile_pool_t *pool, struct statfile *st, struct config_file *cfg)
  {
         struct rspamd_sync_ctx *ctx;
+       guint32 jittered_interval;
         
         if (st->binlog->master_addr.s_addr != INADDR_NONE &&
                         st->binlog->master_addr.s_addr != INADDR_ANY) {
                 ctx = memory_pool_alloc (pool->pool, sizeof (struct rspamd_sync_ctx));
                 ctx->st = st;
+               ctx->timeout = cfg->statfile_sync_timeout;
+               ctx->sync_interval = cfg->statfile_sync_interval;
                 /* Add some jittering for synchronization */
-               ctx->interval.tv_sec = g_random_int_range (MAX_SYNC_TIME, MAX_SYNC_TIME * 2);
-               ctx->interval.tv_usec = 0;
+               jittered_interval = g_random_int_range (ctx->sync_interval, ctx->sync_interval * 2);
+               msec_to_tv (jittered_interval, &ctx->interval);
                 /* Open statfile and attach it to pool */
                 if ((ctx->real_statfile = statfile_pool_is_open (pool, st->path)) == NULL) {
                         if ((ctx->real_statfile = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) {
@@ -331,7 +334,7 @@ start_statfile_sync (statfile_pool_t *pool, struct config_file *cfg)
                 while (l) {
                         st = l->data;
                         if (st->binlog != NULL && st->binlog->affinity == AFFINITY_SLAVE) {
-                               if (!add_statfile_watch (pool, st)) {
+                               if (!add_statfile_watch (pool, st, cfg)) {
                                         return FALSE;
                                 }
                         }
diff --git a/src/tokenizers/osb.c b/src/tokenizers/osb.c

index 5f5dfcdcd88d192b170e2caf26d0041e11b0a60d..bc57255cb50a70e59e493204777ab9bc589ec57b 100644 (file)
--- a/src/tokenizers/osb.c
+++ b/src/tokenizers/osb.c
@@ -36,55 +36,56 @@ extern const int                primes[];
  
  int
  osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t * input, GTree ** tree,
-               gboolean save_token, gboolean is_utf)
+               gboolean save_token, gboolean is_utf, GList *exceptions)
  {
         token_node_t                   *new = NULL;
-       f_str_t                         token = { NULL, 0, 0 }, *res;
-       uint32_t                        hashpipe[FEATURE_WINDOW_SIZE], h1, h2;
-       int                             i;
-
-       /* First set all bytes of hashpipe to some common value */
-       for (i = 0; i < FEATURE_WINDOW_SIZE; i++) {
-               hashpipe[i] = 0xABCDEF;
-       }
+       f_str_t                         token = { NULL, 0, 0 };
+       guint32                         hashpipe[FEATURE_WINDOW_SIZE], h1, h2;
+       gint                            i, k = 0, l;
+       gchar                          *res;
  
         if (*tree == NULL) {
                 *tree = g_tree_new (token_node_compare_func);
                 memory_pool_add_destructor (pool, (pool_destruct_func) g_tree_destroy, *tree);
         }
  
-       while ((res = tokenizer->get_next_word (input, &token)) != NULL) {
+       while ((res = tokenizer->get_next_word (input, &token, &exceptions)) != NULL) {
                 /* Skip small words */
                 if (is_utf) {
-                       if (g_utf8_strlen (token.begin, token.len) < MIN_LEN) {
-                               continue;
-                       }
+                       l = g_utf8_strlen (token.begin, token.len);
                 }
                 else {
-                       if (token.len < MIN_LEN) {
-                               continue;
-                       }
+                       l = token.len;
                 }
+               if (l < MIN_LEN) {
+                       token.begin = res;
+                       continue;
+               }
+
                 /* Shift hashpipe */
                 for (i = FEATURE_WINDOW_SIZE - 1; i > 0; i--) {
                         hashpipe[i] = hashpipe[i - 1];
                 }
-               hashpipe[0] = fstrhash (&token);
+               hashpipe[0] = fstrhash_lowercase (&token, is_utf);
  
-               for (i = 1; i < FEATURE_WINDOW_SIZE; i++) {
-                       h1 = hashpipe[0] * primes[0] + hashpipe[i] * primes[i << 1];
-                       h2 = hashpipe[0] * primes[1] + hashpipe[i] * primes[(i << 1) - 1];
-                       new = memory_pool_alloc0 (pool, sizeof (token_node_t));
-                       new->h1 = h1;
-                       new->h2 = h2;
-                       if (save_token) {
-                               new->extra = (uintptr_t)memory_pool_fstrdup (pool, &token);
-                       }
+               if (k > FEATURE_WINDOW_SIZE) {
+                       for (i = 1; i < FEATURE_WINDOW_SIZE; i++) {
+                               h1 = hashpipe[0] * primes[0] + hashpipe[i] * primes[i << 1];
+                               h2 = hashpipe[0] * primes[1] + hashpipe[i] * primes[(i << 1) - 1];
+                               new = memory_pool_alloc0 (pool, sizeof (token_node_t));
+                               new->h1 = h1;
+                               new->h2 = h2;
+                               if (save_token) {
+                                       new->extra = (uintptr_t)memory_pool_fstrdup (pool, &token);
+                               }
  
-                       if (g_tree_lookup (*tree, new) == NULL) {
-                               g_tree_insert (*tree, new, new);
+                               if (g_tree_lookup (*tree, new) == NULL) {
+                                       g_tree_insert (*tree, new, new);
+                               }
                         }
                 }
+               k ++;
+               token.begin = res;
         }
  
         return TRUE;
diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c

index 9e41a9101cc48fd9cda09736fe5360b0991856c6..be73e506d4e7fed220f68eda25c908d104a2b466 100644 (file)
--- a/src/tokenizers/tokenizers.c
+++ b/src/tokenizers/tokenizers.c
@@ -52,7 +52,7 @@ const gchar t_delimiters[255] = {
                 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 1, 0, 1, 1, 1, 1, 1, 0,
-               1, 1, 1, 1, 1, 0, 1, 1, 0, 0,
+               1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
                 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -103,44 +103,76 @@ token_node_compare_func (gconstpointer a, gconstpointer b)
  }
  
  /* Get next word from specified f_str_t buf */
-f_str_t                        *
-get_next_word (f_str_t * buf, f_str_t * token)
+gchar *
+get_next_word (f_str_t * buf, f_str_t * token, GList **exceptions)
  {
-       size_t                          remain;
-       guchar                         *pos;
+       gsize                           remain, pos;
+       guchar                         *p;
+       struct process_exception           *ex = NULL;
  
         if (buf == NULL) {
                 return NULL;
         }
+
+       if (*exceptions != NULL) {
+               ex = (*exceptions)->data;
+       }
+
         if (token->begin == NULL) {
-               token->begin = buf->begin;
+               if (ex != NULL) {
+                       if (ex->pos == 0) {
+                               token->begin = buf->begin + ex->len;
+                               token->len = ex->len;
+                       }
+                       else {
+                               token->begin = buf->begin;
+                               token->len = 0;
+                       }
+               }
+               else {
+                       token->begin = buf->begin;
+                       token->len = 0;
+               }
         }
  
-       token->begin = token->begin + token->len;
         token->len = 0;
  
         remain = buf->len - (token->begin - buf->begin);
         if (remain <= 0) {
                 return NULL;
         }
-       pos = token->begin;
+       pos = token->begin - buf->begin;
+       p = token->begin;
         /* Skip non delimiters symbols */
-       while (remain > 0 && t_delimiters[*pos]) {
-               token->begin++;
+       do {
+               if (ex != NULL && ex->pos == pos) {
+                       /* Go to the next exception */
+                       *exceptions = g_list_next (*exceptions);
+                       return p + ex->len + 1;
+               }
                 pos++;
+               p++;
                 remain--;
-       }
-       while (remain > 0 && !t_delimiters[*pos]) {
+       } while (remain > 0 && t_delimiters[*p]);
+
+       token->begin = p;
+
+       while (remain > 0 && !t_delimiters[*p]) {
+               if (ex != NULL && ex->pos == pos) {
+                       *exceptions = g_list_next (*exceptions);
+                       return p + ex->len + 1;
+               }
                 token->len++;
                 pos++;
                 remain--;
+               p ++;
         }
  
-       if (token->len == 0) {
+       if (remain == 0) {
                 return NULL;
         }
  
-       return token;
+       return p;
  }
  
  /* Struct to access gmime headers */
@@ -239,13 +271,13 @@ tokenize_subject (struct worker_task *task, GTree ** tree)
                 new = memory_pool_alloc (task->task_pool, sizeof (token_node_t));
                 subject.begin = task->subject;
                 subject.len = strlen (task->subject);
-               osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE, TRUE);
+               osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE, TRUE, NULL);
         }
         if ((sub = g_mime_message_get_subject (task->message)) != NULL) {
                 new = memory_pool_alloc (task->task_pool, sizeof (token_node_t));
                 subject.begin = (gchar *)sub;
                 subject.len = strlen (sub);
-               osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE, TRUE);
+               osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE, TRUE, NULL);
         }
  }
  
diff --git a/src/tokenizers/tokenizers.h b/src/tokenizers/tokenizers.h

index df5481a1fea525cf794e35350291a7329256330c..c78d90b0efc9c0089d016f5d84a3993c98e4d16a 100644 (file)
--- a/src/tokenizers/tokenizers.h
+++ b/src/tokenizers/tokenizers.h
@@ -15,17 +15,18 @@
  #define FEATURE_WINDOW_SIZE 5
  
  typedef struct token_node_s {
-       uint32_t h1;
-       uint32_t h2;
+       guint32 h1;
+       guint32 h2;
         float value;
         uintptr_t extra;
  } token_node_t;
  
  /* Common tokenizer structure */
  struct tokenizer {
-       char *name;
-       int (*tokenize_func)(struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur, gboolean save_token, gboolean is_utf);
-       f_str_t* (*get_next_word)(f_str_t *buf, f_str_t *token);
+       gchar *name;
+       gint (*tokenize_func)(struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input,
+                       GTree **cur, gboolean save_token, gboolean is_utf, GList *exceptions);
+       gchar* (*get_next_word)(f_str_t *buf, f_str_t *token, GList **exceptions);
  };
  
  /* Compare two token nodes */
@@ -33,9 +34,10 @@ int token_node_compare_func (gconstpointer a, gconstpointer b);
  /* Get tokenizer structure by name or return NULL if this name is not found */
  struct tokenizer* get_tokenizer (char *name);
  /* Get next word from specified f_str_t buf */
-f_str_t *get_next_word (f_str_t *buf, f_str_t *token);
+gchar* get_next_word (f_str_t *buf, f_str_t *token, GList **exceptions);
  /* OSB tokenize function */
-int osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur, gboolean save_token, gboolean is_utf);
+int osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input,
+               GTree **cur, gboolean save_token, gboolean is_utf, GList *exceptions);
  /* Common tokenizer for headers */
  int tokenize_headers (memory_pool_t *pool, struct worker_task *task, GTree **cur);
  /* Make tokens for a subject */
diff --git a/src/url.c b/src/url.c

index dbc04ffabbce19643c35617b9fdc3283b77f5242..c8895e1071d13824071ecbe60fe95cfe86fe468b 100644 (file)
--- a/src/url.c
+++ b/src/url.c
@@ -1160,6 +1160,7 @@ url_parse_text (memory_pool_t * pool, struct worker_task *task, struct mime_text
         gint                            rc;
         gchar                          *url_str = NULL, *url_start, *url_end;
         struct uri                     *new;
+       struct process_exception       *ex;
         gchar                          *p, *end, *begin;
  
  
@@ -1183,13 +1184,14 @@ url_parse_text (memory_pool_t * pool, struct worker_task *task, struct mime_text
                         if (url_try_text (pool, p, end - p, &url_start, &url_end, &url_str)) {
                                 if (url_str != NULL) {
                                         new = memory_pool_alloc0 (pool, sizeof (struct uri));
+                                       ex = memory_pool_alloc0 (pool, sizeof (struct process_exception));
                                         if (new != NULL) {
                                                 g_strstrip (url_str);
                                                 rc = parse_uri (new, url_str, pool);
                                                 if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) &&
                                                                 new->hostlen > 0) {
-                                                       new->pos = url_start - begin;
-                                                       new->len = url_end - url_start;
+                                                       ex->pos = url_start - begin;
+                                                       ex->len = url_end - url_start;
                                                         if (new->protocol == PROTOCOL_MAILTO) {
                                                                 if (!g_tree_lookup (task->emails, new)) {
                                                                         g_tree_insert (task->emails, new, new);
@@ -1200,7 +1202,7 @@ url_parse_text (memory_pool_t * pool, struct worker_task *task, struct mime_text
                                                                         g_tree_insert (task->urls, new, new);
                                                                 }
                                                         }
-                                                       part->urls_offset = g_list_prepend (part->urls_offset, new);
+                                                       part->urls_offset = g_list_prepend (part->urls_offset, ex);
                                                 }
                                                 else if (rc != URI_ERRNO_OK) {
                                                         msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc));
@@ -1256,10 +1258,10 @@ url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gchar **start,
                                 *url_str = NULL;
                         }
                         if (start != NULL) {
-                               *start = (gchar *)pos;
+                               *start = (gchar *)m.m_begin;
                         }
                         if (fin != NULL) {
-                               *fin = (gchar *)pos + m.m_len;
+                               *fin = (gchar *)m.m_begin + m.m_len;
                         }
                         return TRUE;
                 }
diff --git a/src/url.h b/src/url.h

index 9c0812e62655bb2561adde63e5db9b40169a2010..5a8e7e1e2ea47b60405ce7118fc34ae7da353c4c 100644 (file)
--- a/src/url.h
+++ b/src/url.h
@@ -32,9 +32,6 @@ struct uri {
  
         struct uri *phished_url;
  
-       gsize pos;
-       gsize len;
-
         /* @protocollen should only be usable if @protocol is either
          * PROTOCOL_USER or an uri string should be composed. */
         guint protocollen;
author	Vsevolod Stakhov <vsevolod@rambler-co.ru>
	Fri, 24 Jun 2011 16:25:54 +0000 (20:25 +0400)
committer	Vsevolod Stakhov <vsevolod@rambler-co.ru>
	Fri, 24 Jun 2011 16:25:54 +0000 (20:25 +0400)
CMakeLists.txt		patch \| blob \| history
src/binlog.c		patch \| blob \| history
src/cfg_file.h		patch \| blob \| history
src/cfg_utils.c		patch \| blob \| history
src/cfg_xml.c		patch \| blob \| history
src/classifiers/bayes.c		patch \| blob \| history
src/controller.c		patch \| blob \| history
src/filter.c		patch \| blob \| history
src/fstring.c		patch \| blob \| history
src/fstring.h		patch \| blob \| history
src/fuzzy.c		patch \| blob \| history
src/main.h		patch \| blob \| history
src/mem_pool.c		patch \| blob \| history
src/message.c		patch \| blob \| history
src/statfile.c		patch \| blob \| history
src/statfile_sync.c		patch \| blob \| history
src/tokenizers/osb.c		patch \| blob \| history
src/tokenizers/tokenizers.c		patch \| blob \| history
src/tokenizers/tokenizers.h		patch \| blob \| history
src/url.c		patch \| blob \| history
src/url.h		patch \| blob \| history