diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-12-17 17:27:57 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-12-17 17:27:57 +0000 |
commit | 7404cdceee25820f36a4002c904fdd7bb385c8f6 (patch) | |
tree | 7cdd2d3ec21df355ae87adc0c55e1ebd71ae0371 /test/rspamd_shingles_test.c | |
parent | c63bee1078c6b8370aadaa73f0bb5f2333fd78cf (diff) | |
download | rspamd-7404cdceee25820f36a4002c904fdd7bb385c8f6.tar.gz rspamd-7404cdceee25820f36a4002c904fdd7bb385c8f6.zip |
Add some tests for shingles.
Diffstat (limited to 'test/rspamd_shingles_test.c')
-rw-r--r-- | test/rspamd_shingles_test.c | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/test/rspamd_shingles_test.c b/test/rspamd_shingles_test.c new file mode 100644 index 000000000..182830fd2 --- /dev/null +++ b/test/rspamd_shingles_test.c @@ -0,0 +1,120 @@ +/* Copyright (c) 2014, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "shingles.h" +#include "fstring.h" +#include "ottery.h" + +static void +generate_random_string (char *begin, size_t len) +{ + gsize i; + + for (i = 0; i < len; i ++) { + begin[i] = ottery_rand_range ('z' - 'a') + 'a'; + } +} + +static GArray * +generate_fuzzy_words (gsize cnt, gsize max_len) +{ + GArray *res; + gsize i, wlen; + rspamd_fstring_t w; + + res = g_array_sized_new (FALSE, FALSE, sizeof (rspamd_fstring_t), cnt); + + for (i = 0; i < cnt; i ++) { + wlen = ottery_rand_range (max_len) + 1; + + w.len = w.size = wlen; + w.begin = g_malloc (wlen); + generate_random_string (w.begin, wlen); + g_array_append_val (res, w); + } + + return res; +} + +static void +permute_vector (GArray *in, gdouble prob) +{ + gsize i; + rspamd_fstring_t *w; + + for (i = 0; i < in->len; i ++) { + if (ottery_rand_unsigned () <= G_MAXUINT * prob) { + w = &g_array_index (in, rspamd_fstring_t, i); + generate_random_string (w->begin, w->len); + } + } +} + +static void +free_fuzzy_words (GArray *ar) +{ + gsize i; + rspamd_fstring_t *w; + + for (i = 0; i < ar->len; i ++) { + w = &g_array_index (ar, rspamd_fstring_t, i); + g_free (w->begin); + } +} + +static void +test_case (gsize cnt, gsize max_len, gdouble perm_factor) +{ + GArray *input; + struct rspamd_shingle *sgl, *sgl_permuted; + gdouble res; + guchar key[16]; + + ottery_rand_bytes (key, sizeof (key)); + input = generate_fuzzy_words (5, 100); + sgl = rspamd_shingles_generate (input, key, NULL, + rspamd_shingles_default_filter, NULL); + permute_vector (input, perm_factor); + sgl_permuted = rspamd_shingles_generate (input, key, NULL, + rspamd_shingles_default_filter, NULL); + + res = rspamd_shingles_compare (sgl, sgl_permuted); + + g_assert_cmpfloat (fabs (res - perm_factor), <=, 0.15); + + free_fuzzy_words (input); + g_free (sgl); + g_free (sgl_permuted); +} + +void +rspamd_shingles_test_func (void) +{ + test_case (5, 100, 0.5); + test_case (500, 100, 0.5); + test_case (5000, 200, 0.1); + test_case (5000, 100, 0); + test_case (5000, 100, 1.0); +} |