From 7404cdceee25820f36a4002c904fdd7bb385c8f6 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 17 Dec 2014 17:27:57 +0000 Subject: [PATCH] Add some tests for shingles. --- test/CMakeLists.txt | 1 + test/rspamd_shingles_test.c | 120 ++++++++++++++++++++++++++++++++++++ test/rspamd_test_suite.c | 1 + test/tests.h | 2 + 4 files changed, 124 insertions(+) create mode 100644 test/rspamd_shingles_test.c diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 021eb0212..4ac7b80a9 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -8,6 +8,7 @@ SET(TESTSRC rspamd_expression_test.c rspamd_dkim_test.c rspamd_rrd_test.c rspamd_radix_test.c + rspamd_shingles_test.c rspamd_upstream_test.c rspamd_test_suite.c) diff --git a/test/rspamd_shingles_test.c b/test/rspamd_shingles_test.c new file mode 100644 index 000000000..182830fd2 --- /dev/null +++ b/test/rspamd_shingles_test.c @@ -0,0 +1,120 @@ +/* Copyright (c) 2014, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "shingles.h" +#include "fstring.h" +#include "ottery.h" + +static void +generate_random_string (char *begin, size_t len) +{ + gsize i; + + for (i = 0; i < len; i ++) { + begin[i] = ottery_rand_range ('z' - 'a') + 'a'; + } +} + +static GArray * +generate_fuzzy_words (gsize cnt, gsize max_len) +{ + GArray *res; + gsize i, wlen; + rspamd_fstring_t w; + + res = g_array_sized_new (FALSE, FALSE, sizeof (rspamd_fstring_t), cnt); + + for (i = 0; i < cnt; i ++) { + wlen = ottery_rand_range (max_len) + 1; + + w.len = w.size = wlen; + w.begin = g_malloc (wlen); + generate_random_string (w.begin, wlen); + g_array_append_val (res, w); + } + + return res; +} + +static void +permute_vector (GArray *in, gdouble prob) +{ + gsize i; + rspamd_fstring_t *w; + + for (i = 0; i < in->len; i ++) { + if (ottery_rand_unsigned () <= G_MAXUINT * prob) { + w = &g_array_index (in, rspamd_fstring_t, i); + generate_random_string (w->begin, w->len); + } + } +} + +static void +free_fuzzy_words (GArray *ar) +{ + gsize i; + rspamd_fstring_t *w; + + for (i = 0; i < ar->len; i ++) { + w = &g_array_index (ar, rspamd_fstring_t, i); + g_free (w->begin); + } +} + +static void +test_case (gsize cnt, gsize max_len, gdouble perm_factor) +{ + GArray *input; + struct rspamd_shingle *sgl, *sgl_permuted; + gdouble res; + guchar key[16]; + + ottery_rand_bytes (key, sizeof (key)); + input = generate_fuzzy_words (5, 100); + sgl = rspamd_shingles_generate (input, key, NULL, + rspamd_shingles_default_filter, NULL); + permute_vector (input, perm_factor); + sgl_permuted = rspamd_shingles_generate (input, key, NULL, + rspamd_shingles_default_filter, NULL); + + res = rspamd_shingles_compare (sgl, sgl_permuted); + + g_assert_cmpfloat (fabs (res - perm_factor), <=, 0.15); + + free_fuzzy_words (input); + g_free (sgl); + g_free (sgl_permuted); +} + +void +rspamd_shingles_test_func (void) +{ + test_case (5, 100, 0.5); + test_case (500, 100, 0.5); + test_case (5000, 200, 0.1); + test_case (5000, 100, 0); + test_case (5000, 100, 1.0); +} diff --git a/test/rspamd_test_suite.c b/test/rspamd_test_suite.c index 61fde350a..0cfbd7b35 100644 --- a/test/rspamd_test_suite.c +++ b/test/rspamd_test_suite.c @@ -54,6 +54,7 @@ main (int argc, char **argv) g_test_add_func ("/rspamd/dkim", rspamd_dkim_test_func); g_test_add_func ("/rspamd/rrd", rspamd_rrd_test_func); g_test_add_func ("/rspamd/upstream", rspamd_upstream_test_func); + g_test_add_func ("/rspamd/shingles", rspamd_shingles_test_func); g_test_run (); diff --git a/test/tests.h b/test/tests.h index f5472bd71..bd1e5b9ac 100644 --- a/test/tests.h +++ b/test/tests.h @@ -37,4 +37,6 @@ void rspamd_rrd_test_func (void); void rspamd_upstream_test_func (void); +void rspamd_shingles_test_func (void); + #endif -- 2.39.5