aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2014-12-17 17:27:57 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2014-12-17 17:27:57 +0000
commit7404cdceee25820f36a4002c904fdd7bb385c8f6 (patch)
tree7cdd2d3ec21df355ae87adc0c55e1ebd71ae0371
parentc63bee1078c6b8370aadaa73f0bb5f2333fd78cf (diff)
downloadrspamd-7404cdceee25820f36a4002c904fdd7bb385c8f6.tar.gz
rspamd-7404cdceee25820f36a4002c904fdd7bb385c8f6.zip
Add some tests for shingles.
-rw-r--r--test/CMakeLists.txt1
-rw-r--r--test/rspamd_shingles_test.c120
-rw-r--r--test/rspamd_test_suite.c1
-rw-r--r--test/tests.h2
4 files changed, 124 insertions, 0 deletions
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 021eb0212..4ac7b80a9 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -8,6 +8,7 @@ SET(TESTSRC rspamd_expression_test.c
rspamd_dkim_test.c
rspamd_rrd_test.c
rspamd_radix_test.c
+ rspamd_shingles_test.c
rspamd_upstream_test.c
rspamd_test_suite.c)
diff --git a/test/rspamd_shingles_test.c b/test/rspamd_shingles_test.c
new file mode 100644
index 000000000..182830fd2
--- /dev/null
+++ b/test/rspamd_shingles_test.c
@@ -0,0 +1,120 @@
+/* Copyright (c) 2014, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "main.h"
+#include "shingles.h"
+#include "fstring.h"
+#include "ottery.h"
+
+static void
+generate_random_string (char *begin, size_t len)
+{
+ gsize i;
+
+ for (i = 0; i < len; i ++) {
+ begin[i] = ottery_rand_range ('z' - 'a') + 'a';
+ }
+}
+
+static GArray *
+generate_fuzzy_words (gsize cnt, gsize max_len)
+{
+ GArray *res;
+ gsize i, wlen;
+ rspamd_fstring_t w;
+
+ res = g_array_sized_new (FALSE, FALSE, sizeof (rspamd_fstring_t), cnt);
+
+ for (i = 0; i < cnt; i ++) {
+ wlen = ottery_rand_range (max_len) + 1;
+
+ w.len = w.size = wlen;
+ w.begin = g_malloc (wlen);
+ generate_random_string (w.begin, wlen);
+ g_array_append_val (res, w);
+ }
+
+ return res;
+}
+
+static void
+permute_vector (GArray *in, gdouble prob)
+{
+ gsize i;
+ rspamd_fstring_t *w;
+
+ for (i = 0; i < in->len; i ++) {
+ if (ottery_rand_unsigned () <= G_MAXUINT * prob) {
+ w = &g_array_index (in, rspamd_fstring_t, i);
+ generate_random_string (w->begin, w->len);
+ }
+ }
+}
+
+static void
+free_fuzzy_words (GArray *ar)
+{
+ gsize i;
+ rspamd_fstring_t *w;
+
+ for (i = 0; i < ar->len; i ++) {
+ w = &g_array_index (ar, rspamd_fstring_t, i);
+ g_free (w->begin);
+ }
+}
+
+static void
+test_case (gsize cnt, gsize max_len, gdouble perm_factor)
+{
+ GArray *input;
+ struct rspamd_shingle *sgl, *sgl_permuted;
+ gdouble res;
+ guchar key[16];
+
+ ottery_rand_bytes (key, sizeof (key));
+ input = generate_fuzzy_words (5, 100);
+ sgl = rspamd_shingles_generate (input, key, NULL,
+ rspamd_shingles_default_filter, NULL);
+ permute_vector (input, perm_factor);
+ sgl_permuted = rspamd_shingles_generate (input, key, NULL,
+ rspamd_shingles_default_filter, NULL);
+
+ res = rspamd_shingles_compare (sgl, sgl_permuted);
+
+ g_assert_cmpfloat (fabs (res - perm_factor), <=, 0.15);
+
+ free_fuzzy_words (input);
+ g_free (sgl);
+ g_free (sgl_permuted);
+}
+
+void
+rspamd_shingles_test_func (void)
+{
+ test_case (5, 100, 0.5);
+ test_case (500, 100, 0.5);
+ test_case (5000, 200, 0.1);
+ test_case (5000, 100, 0);
+ test_case (5000, 100, 1.0);
+}
diff --git a/test/rspamd_test_suite.c b/test/rspamd_test_suite.c
index 61fde350a..0cfbd7b35 100644
--- a/test/rspamd_test_suite.c
+++ b/test/rspamd_test_suite.c
@@ -54,6 +54,7 @@ main (int argc, char **argv)
g_test_add_func ("/rspamd/dkim", rspamd_dkim_test_func);
g_test_add_func ("/rspamd/rrd", rspamd_rrd_test_func);
g_test_add_func ("/rspamd/upstream", rspamd_upstream_test_func);
+ g_test_add_func ("/rspamd/shingles", rspamd_shingles_test_func);
g_test_run ();
diff --git a/test/tests.h b/test/tests.h
index f5472bd71..bd1e5b9ac 100644
--- a/test/tests.h
+++ b/test/tests.h
@@ -37,4 +37,6 @@ void rspamd_rrd_test_func (void);
void rspamd_upstream_test_func (void);
+void rspamd_shingles_test_func (void);
+
#endif