summaryrefslogtreecommitdiffstats
path: root/src/libutil/fuzzy.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/libutil/fuzzy.c')
-rw-r--r--src/libutil/fuzzy.c82
1 files changed, 39 insertions, 43 deletions
diff --git a/src/libutil/fuzzy.c b/src/libutil/fuzzy.c
index a58fee654..7e8a01ce3 100644
--- a/src/libutil/fuzzy.c
+++ b/src/libutil/fuzzy.c
@@ -24,31 +24,30 @@
#include "config.h"
+#include "mem_pool.h"
#include "fstring.h"
#include "fuzzy.h"
-#include "main.h"
-#include "mem_pool.h"
#include "message.h"
#include "url.h"
+#include "main.h"
#define ROLL_WINDOW_SIZE 9
#define MIN_FUZZY_BLOCK_SIZE 3
#define HASH_INIT 0x28021967
-static const char *b64 =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const char *b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
struct roll_state {
- guint32 h[3];
- gchar window[ROLL_WINDOW_SIZE];
- gint n;
+ guint32 h[3];
+ gchar window[ROLL_WINDOW_SIZE];
+ gint n;
};
-static struct roll_state rs;
+static struct roll_state rs;
/* Rolling hash function based on Adler-32 checksum */
-static guint32
+static guint32
fuzzy_roll_hash (guint c)
{
/* Check window position */
@@ -73,20 +72,19 @@ fuzzy_roll_hash (guint c)
}
/* A simple non-rolling hash, based on the FNV hash */
-static guint32
+static guint32
fuzzy_fnv_hash (guint c, guint32 hval)
{
hval ^= c;
- hval +=
- (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24);
+ hval += (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24);
return hval;
}
/* Calculate blocksize depending on length of input */
-static guint32
+static guint32
fuzzy_blocksize (guint32 len)
{
- guint32 nlen = MIN_FUZZY_BLOCK_SIZE;
+ guint32 nlen = MIN_FUZZY_BLOCK_SIZE;
while (nlen * (FUZZY_HASHLEN - 1) < len) {
nlen *= 2;
@@ -143,12 +141,12 @@ fuzzy_update2 (fuzzy_hash_t * h1, fuzzy_hash_t *h2, guint c)
guint32
lev_distance (gchar *s1, gint len1, gchar *s2, gint len2)
{
- gint i;
- gint *row; /* we only need to keep one row of costs */
- gint *end;
- gint half, nx;
- gchar *sx, *char2p, char1;
- gint *p, D, x, offset, c3;
+ gint i;
+ gint *row; /* we only need to keep one row of costs */
+ gint *end;
+ gint half, nx;
+ gchar *sx, *char2p, char1;
+ gint *p, D, x, offset, c3;
/* strip common prefix */
while (len1 > 0 && len2 > 0 && *s1 == *s2) {
@@ -254,13 +252,13 @@ lev_distance (gchar *s1, gint len1, gchar *s2, gint len2)
}
/* Calculate fuzzy hash for specified string */
-fuzzy_hash_t *
+fuzzy_hash_t *
fuzzy_init (f_str_t * in, rspamd_mempool_t * pool)
{
- fuzzy_hash_t *new;
- guint i, repeats = 0;
- gchar *c = in->begin, last = '\0';
- gsize real_len = 0;
+ fuzzy_hash_t *new;
+ guint i, repeats = 0;
+ gchar *c = in->begin, last = '\0';
+ gsize real_len = 0;
new = rspamd_mempool_alloc0 (pool, sizeof (fuzzy_hash_t));
bzero (&rs, sizeof (rs));
@@ -272,7 +270,7 @@ fuzzy_init (f_str_t * in, rspamd_mempool_t * pool)
repeats = 0;
}
if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c) && repeats < 3) {
- real_len++;
+ real_len ++;
}
last = *c;
c++;
@@ -303,10 +301,10 @@ fuzzy_init (f_str_t * in, rspamd_mempool_t * pool)
return new;
}
-fuzzy_hash_t *
+fuzzy_hash_t *
fuzzy_init_byte_array (GByteArray * in, rspamd_mempool_t * pool)
{
- f_str_t f;
+ f_str_t f;
f.begin = (gchar *)in->data;
f.len = in->len;
@@ -315,17 +313,15 @@ fuzzy_init_byte_array (GByteArray * in, rspamd_mempool_t * pool)
}
void
-fuzzy_init_part (struct mime_text_part *part,
- rspamd_mempool_t *pool,
- gsize max_diff)
+fuzzy_init_part (struct mime_text_part *part, rspamd_mempool_t *pool, gsize max_diff)
{
- fuzzy_hash_t *new, *new2;
- gchar *c, *end, *begin;
- gsize real_len = 0, len = part->content->len;
- GList *cur_offset;
- struct process_exception *cur_ex = NULL;
- gunichar uc;
- gboolean write_diff = FALSE;
+ fuzzy_hash_t *new, *new2;
+ gchar *c, *end, *begin;
+ gsize real_len = 0, len = part->content->len;
+ GList *cur_offset;
+ struct process_exception *cur_ex = NULL;
+ gunichar uc;
+ gboolean write_diff = FALSE;
cur_offset = part->urls_offset;
if (cur_offset != NULL) {
@@ -351,7 +347,7 @@ fuzzy_init_part (struct mime_text_part *part,
else {
uc = g_utf8_get_char (c);
if (g_unichar_isalnum (uc)) {
- real_len++;
+ real_len ++;
}
c = g_utf8_next_char (c);
}
@@ -368,7 +364,7 @@ fuzzy_init_part (struct mime_text_part *part,
}
else {
if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c)) {
- real_len++;
+ real_len ++;
}
c++;
}
@@ -454,7 +450,7 @@ fuzzy_init_part (struct mime_text_part *part,
gint
fuzzy_compare_hashes (fuzzy_hash_t * h1, fuzzy_hash_t * h2)
{
- gint res, l1, l2;
+ gint res, l1, l2;
/* If we have hashes of different size, input strings are too different */
if (h1->block_size != h2->block_size) {
@@ -497,6 +493,6 @@ fuzzy_compare_parts (struct mime_text_part *p1, struct mime_text_part *p2)
return 0;
}
-/*
- * vi:ts=4
+/*
+ * vi:ts=4
*/