aboutsummaryrefslogtreecommitdiffstats
path: root/src/libutil/shingles.h
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2014-12-17 16:44:52 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2014-12-17 16:44:52 +0000
commit7eb5a5745b0df7a5b05dd6beee9a41e60cefa7e7 (patch)
tree4a1134991746cafcb01efb3046b32abf59ceb369 /src/libutil/shingles.h
parent4178270ca52b46428b0aa9dc7fcf0fd7f03d8c7a (diff)
downloadrspamd-7eb5a5745b0df7a5b05dd6beee9a41e60cefa7e7.tar.gz
rspamd-7eb5a5745b0df7a5b05dd6beee9a41e60cefa7e7.zip
Add preliminary shingles version.
Diffstat (limited to 'src/libutil/shingles.h')
-rw-r--r--src/libutil/shingles.h66
1 files changed, 66 insertions, 0 deletions
diff --git a/src/libutil/shingles.h b/src/libutil/shingles.h
new file mode 100644
index 000000000..571199992
--- /dev/null
+++ b/src/libutil/shingles.h
@@ -0,0 +1,66 @@
+/* Copyright (c) 2014, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef SHINGLES_H_
+#define SHINGLES_H_
+
+#include "config.h"
+#include "mem_pool.h"
+
+#define RSPAMD_SHINGLE_SIZE 84
+
+struct rspamd_shingle {
+ guint64 hashes[RSPAMD_SHINGLE_SIZE];
+};
+
+/**
+ * Shingles filtering function
+ * @param input input array of hashes
+ * @param count number of hashes in the vector
+ * @return shingle value
+ */
+typedef guint64 (*rspamd_shingles_filter) (guint64 *input, gsize count,
+ gpointer ud);
+
+/**
+ * Generate shingles from the input of fixed size strings using lemmatizer
+ * if needed
+ * @param input array of `rspamd_fstring_t`
+ * @param key secret key used to generate shingles
+ * @param pool pool to allocate shigles array
+ * @param filter hashes filtering function
+ * @param filterd opaque data for filtering function
+ * @return shingles array
+ */
+struct rspamd_shingle* rspamd_shingles_generate (GArray *input,
+ const guchar key[16],
+ rspamd_mempool_t *pool,
+ rspamd_shingles_filter filter,
+ gpointer filterd);
+
+/**
+ * Default filtering function
+ */
+guint64 rspamd_shingles_default_filter (guint64 *input, gsize count,
+ gpointer ud);
+
+#endif /* SHINGLES_H_ */