summaryrefslogtreecommitdiffstats
path: root/src/plugins/custom
diff options
context:
space:
mode:
authorcebka@lenovo-laptop <cebka@lenovo-laptop>2010-02-26 21:12:24 +0300
committercebka@lenovo-laptop <cebka@lenovo-laptop>2010-02-26 21:12:24 +0300
commit246f7ec03f58e7efce2727c9196a4031d45ae5dd (patch)
tree0591d1aaf473b1da91bdd55ab92e692851ba0a86 /src/plugins/custom
parent2cab3a9c488cb9042acf4350dc327a7dcb0c9eb9 (diff)
downloadrspamd-246f7ec03f58e7efce2727c9196a4031d45ae5dd.tar.gz
rspamd-246f7ec03f58e7efce2727c9196a4031d45ae5dd.zip
* Add custom filter for making marks for new user:
- each username is passed throught metaphone filter - then we make prefix tree based on english letters from metaphone - then we are searching for the longest common string and output result (how many times we got that string)
Diffstat (limited to 'src/plugins/custom')
-rw-r--r--src/plugins/custom/CMakeLists.txt1
-rw-r--r--src/plugins/custom/ipmark/CMakeLists.txt2
-rw-r--r--src/plugins/custom/ipmark/ipmark.c6
-rw-r--r--src/plugins/custom/regmark/CMakeLists.txt10
-rw-r--r--src/plugins/custom/regmark/metaphone.c466
-rw-r--r--src/plugins/custom/regmark/metaphone.h27
-rw-r--r--src/plugins/custom/regmark/prefix_tree.c177
-rw-r--r--src/plugins/custom/regmark/prefix_tree.h35
-rw-r--r--src/plugins/custom/regmark/regmark.c172
9 files changed, 892 insertions, 4 deletions
diff --git a/src/plugins/custom/CMakeLists.txt b/src/plugins/custom/CMakeLists.txt
index c5e1ec99f..5d076c41d 100644
--- a/src/plugins/custom/CMakeLists.txt
+++ b/src/plugins/custom/CMakeLists.txt
@@ -1 +1,2 @@
ADD_SUBDIRECTORY(ipmark)
+ADD_SUBDIRECTORY(regmark)
diff --git a/src/plugins/custom/ipmark/CMakeLists.txt b/src/plugins/custom/ipmark/CMakeLists.txt
index 42785599b..1c1b1e64c 100644
--- a/src/plugins/custom/ipmark/CMakeLists.txt
+++ b/src/plugins/custom/ipmark/CMakeLists.txt
@@ -4,7 +4,7 @@ SET(IPMARKSRC ipmark.c
../../../mem_pool.c
)
-ADD_LIBRARY(rspamd_ipmark SHARED ${IPMARKSRC})
+ADD_LIBRARY(rspamd_ipmark MODULE ${IPMARKSRC})
TARGET_LINK_LIBRARIES(rspamd_ipmark ${GLIB2_LIBRARIES})
INSTALL(TARGETS rspamd_ipmark DESTINATION lib)
diff --git a/src/plugins/custom/ipmark/ipmark.c b/src/plugins/custom/ipmark/ipmark.c
index e919cd417..9bebcab22 100644
--- a/src/plugins/custom/ipmark/ipmark.c
+++ b/src/plugins/custom/ipmark/ipmark.c
@@ -54,8 +54,8 @@ void after_connect (char **output, char **log_line, void *user_data);
void module_fin (void);
/* Internal variables */
-char *filename = NULL;
-radix_tree_t *radix = NULL;
+static char *filename = NULL;
+static radix_tree_t *radix = NULL;
/* Implementation */
@@ -259,7 +259,7 @@ parse_line (const char *line, size_t len, char **output, void *user_data)
const char *p;
char *c = ip_buf, *err_str;
struct in_addr ina;
- int state = 0, next_state, dots;
+ int state = 0, next_state = 0, dots = 0;
int16_t value;
uint32_t mask;
enum ipmark_command cmd;
diff --git a/src/plugins/custom/regmark/CMakeLists.txt b/src/plugins/custom/regmark/CMakeLists.txt
new file mode 100644
index 000000000..e6242ab90
--- /dev/null
+++ b/src/plugins/custom/regmark/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Regmark plugin makefile
+SET(REGMARKSRC regmark.c
+ metaphone.c
+ prefix_tree.c
+)
+
+ADD_LIBRARY(rspamd_regmark MODULE ${REGMARKSRC})
+TARGET_LINK_LIBRARIES(rspamd_regmark ${GLIB2_LIBRARIES})
+
+INSTALL(TARGETS rspamd_regmark DESTINATION lib)
diff --git a/src/plugins/custom/regmark/metaphone.c b/src/plugins/custom/regmark/metaphone.c
new file mode 100644
index 000000000..81fa0544e
--- /dev/null
+++ b/src/plugins/custom/regmark/metaphone.c
@@ -0,0 +1,466 @@
+/*
+ * Copyright (c) 2009, Rambler media
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This is implementation of metaphone algorithm that was originally written by
+ * Michael G Schwern <schwern@pobox.com> as perl XS module
+ */
+
+/*
+ * I suppose I could have been using a character pointer instead of
+ * accesssing the array directly...
+ */
+
+#include "../../../config.h"
+#include "metaphone.h"
+
+/*
+ * Look at the next letter in the word
+ */
+#define Next_Letter (g_ascii_toupper (word[w_idx+1]))
+/*
+ * Look at the current letter in the word
+ */
+#define Curr_Letter (g_ascii_toupper(word[w_idx]))
+/*
+ * Go N letters back.
+ */
+#define Look_Back_Letter(n) (w_idx >= n ? g_ascii_toupper(word[w_idx-n]) : '\0')
+/*
+ * Previous letter. I dunno, should this return null on failure?
+ */
+#define Prev_Letter (Look_Back_Letter(1))
+/*
+ * Look two letters down. It makes sure you don't walk off the string.
+ */
+#define After_Next_Letter (Next_Letter != '\0' ? g_ascii_toupper(word[w_idx+2]) \
+ : '\0')
+#define Look_Ahead_Letter(n) (g_ascii_toupper(Lookahead(word+w_idx, n)))
+
+#define SH 'X'
+#define TH '0'
+
+/*-- Character encoding array & accessing macros --*/
+/* Stolen directly out of the book... */
+char _codes[26] = {
+ 1,16,4,16,9,2,4,16,9,2,0,2,2,2,1,4,0,2,4,4,1,0,0,0,8,0
+/* a b c d e f g h i j k l m n o p q r s t u v w x y z */
+};
+
+
+/*
+ * Allows us to safely look ahead an arbitrary # of letters
+ */
+/*
+ * I probably could have just used strlen...
+ */
+static char
+Lookahead (char *word, int how_far)
+{
+ char letter_ahead = '\0'; /* null by default */
+ int idx;
+ for (idx = 0; word[idx] != '\0' && idx < how_far; idx++);
+ /*
+ * Edge forward in the string...
+ */
+
+ letter_ahead = word[idx]; /* idx will be either == to how_far or at
+ * the end of the string */
+ return letter_ahead;
+}
+
+
+/*
+ * phonize one letter
+ */
+#define Phonize(c) {(*phoned_word)[p_idx++] = c;}
+/*
+ * Slap a null character on the end of the phoned word
+ */
+#define End_Phoned_Word {(*phoned_word)[p_idx] = '\0';}
+/*
+ * How long is the phoned word?
+ */
+#define Phone_Len (p_idx)
+
+/*
+ * Note is a letter is a 'break' in the word
+ */
+#define Isbreak(c) (!g_ascii_isalpha(c))
+
+
+gboolean
+metaphone (char *word, int max_phonemes, char **phoned_word)
+{
+ int w_idx = 0; /* point in the phonization we're at. */
+ int p_idx = 0; /* end of the phoned phrase */
+
+ /*-- Parameter checks --*/
+ /*
+ * Assume largest possible if we're given no limit
+ */
+ if (max_phonemes == 0) {
+ max_phonemes = strlen (word);
+ }
+
+ /*-- Allocate memory for our phoned_phrase --*/
+ *phoned_word = g_malloc (max_phonemes * sizeof (char));
+
+ /*-- The first phoneme has to be processed specially. --*/
+ /*
+ * Find our first letter
+ */
+ for (; ! g_ascii_isalpha (Curr_Letter); w_idx++) {
+ /*
+ * On the off chance we were given nothing but crap...
+ */
+ if (Curr_Letter == '\0') {
+ End_Phoned_Word
+ return TRUE; /* For testing */
+ }
+ }
+
+ switch (Curr_Letter) {
+ /*
+ * AE becomes E
+ */
+ case 'A':
+ if (Next_Letter == 'E') {
+ Phonize ('E');
+ w_idx += 2;
+ }
+ /*
+ * Remember, preserve vowels at the beginning
+ */
+ else {
+ Phonize ('A');
+ w_idx++;
+ }
+ break;
+ /*
+ * [GKP]N becomes N
+ */
+ case 'G':
+ case 'K':
+ case 'P':
+ if (Next_Letter == 'N') {
+ Phonize ('N');
+ w_idx += 2;
+ }
+ break;
+ /*
+ * WH becomes H, WR becomes R W if followed by a vowel
+ */
+ case 'W':
+ if (Next_Letter == 'H' || Next_Letter == 'R') {
+ Phonize (Next_Letter);
+ w_idx += 2;
+ } else if (isvowel (Next_Letter)) {
+ Phonize ('W');
+ w_idx += 2;
+ }
+ /*
+ * else ignore
+ */
+ break;
+ /*
+ * X becomes S
+ */
+ case 'X':
+ Phonize ('S');
+ w_idx++;
+ break;
+ /*
+ * Vowels are kept
+ */
+ /*
+ * We did A already case 'A': case 'a':
+ */
+ case 'E':
+ case 'I':
+ case 'O':
+ case 'U':
+ Phonize (Curr_Letter);
+ w_idx++;
+ break;
+ default:
+ /*
+ * do nothing
+ */
+ break;
+ }
+
+
+
+ /*
+ * On to the metaphoning
+ */
+ for (; Curr_Letter != '\0' &&
+ (max_phonemes == 0 || Phone_Len < max_phonemes); w_idx++) {
+ /*
+ * How many letters to skip because an eariler encoding handled
+ * multiple letters
+ */
+ unsigned short int skip_letter = 0;
+
+
+ /*
+ * THOUGHT: It would be nice if, rather than having things like...
+ * well, SCI. For SCI you encode the S, then have to remember to
+ * skip the C. So the phonome SCI invades both S and C. It would
+ * be better, IMHO, to skip the C from the S part of the encoding.
+ * Hell, I'm trying it.
+ */
+
+ /*
+ * Ignore non-alphas
+ */
+ if (! g_ascii_isalpha (Curr_Letter))
+ continue;
+
+ /*
+ * Drop duplicates, except CC
+ */
+ if (Curr_Letter == Prev_Letter && Curr_Letter != 'C')
+ continue;
+
+ switch (Curr_Letter) {
+ /*
+ * B -> B unless in MB
+ */
+ case 'B':
+ if (Prev_Letter != 'M')
+ Phonize ('B');
+ break;
+ /*
+ * 'sh' if -CIA- or -CH, but not SCH, except SCHW. (SCHW is
+ * handled in S) S if -CI-, -CE- or -CY- dropped if -SCI-,
+ * SCE-, -SCY- (handed in S) else K
+ */
+ case 'C':
+ if (MAKESOFT (Next_Letter)) { /* C[IEY] */
+ if (After_Next_Letter == 'A' && Next_Letter == 'I') { /* CIA
+ */
+ Phonize (SH);
+ }
+ /*
+ * SC[IEY]
+ */
+ else if (Prev_Letter == 'S') {
+ /*
+ * Dropped
+ */
+ } else {
+ Phonize ('S');
+ }
+ } else if (Next_Letter == 'H') {
+#ifndef USE_TRADITIONAL_METAPHONE
+ if (After_Next_Letter == 'R' || Prev_Letter == 'S') { /* Christ,
+ * School
+ */
+ Phonize ('K');
+ } else {
+ Phonize (SH);
+ }
+#else
+ Phonize (SH);
+#endif
+ skip_letter++;
+ } else {
+ Phonize ('K');
+ }
+ break;
+ /*
+ * J if in -DGE-, -DGI- or -DGY- else T
+ */
+ case 'D':
+ if (Next_Letter == 'G' && MAKESOFT (After_Next_Letter)) {
+ Phonize ('J');
+ skip_letter++;
+ } else {
+ Phonize ('T');
+ }
+ break;
+ /*
+ * F if in -GH and not B--GH, D--GH, -H--GH, -H---GH else
+ * dropped if -GNED, -GN, else dropped if -DGE-, -DGI- or
+ * -DGY- (handled in D) else J if in -GE-, -GI, -GY and not GG
+ * else K
+ */
+ case 'G':
+ if (Next_Letter == 'H') {
+ if (!(NOGHTOF (Look_Back_Letter (3)) ||
+ Look_Back_Letter (4) == 'H')) {
+ Phonize ('F');
+ skip_letter++;
+ } else {
+ /*
+ * silent
+ */
+ }
+ } else if (Next_Letter == 'N') {
+ if (Isbreak (After_Next_Letter) ||
+ (After_Next_Letter == 'E' &&
+ Look_Ahead_Letter (3) == 'D')) {
+ /*
+ * dropped
+ */
+ } else {
+ Phonize ('K');
+ }
+ } else if (MAKESOFT (Next_Letter) && Prev_Letter != 'G') {
+ Phonize ('J');
+ } else {
+ Phonize ('K');
+ }
+ break;
+ /*
+ * H if before a vowel and not after C,G,P,S,T
+ */
+ case 'H':
+ if (isvowel (Next_Letter) && !AFFECTH (Prev_Letter)) {
+ Phonize ('H');
+ }
+ break;
+ /*
+ * dropped if after C else K
+ */
+ case 'K':
+ if (Prev_Letter != 'C') {
+ Phonize ('K');
+ }
+ break;
+ /*
+ * F if before H else P
+ */
+ case 'P':
+ if (Next_Letter == 'H') {
+ Phonize ('F');
+ } else {
+ Phonize ('P');
+ }
+ break;
+ /*
+ * K
+ */
+ case 'Q':
+ Phonize ('K');
+ break;
+ /*
+ * 'sh' in -SH-, -SIO- or -SIA- or -SCHW- else S
+ */
+ case 'S':
+ if (Next_Letter == 'I' &&
+ (After_Next_Letter == 'O' || After_Next_Letter == 'A')) {
+ Phonize (SH);
+ } else if (Next_Letter == 'H') {
+ Phonize (SH);
+ skip_letter++;
+ }
+#ifndef USE_TRADITIONAL_METAPHONE
+ else if (Next_Letter == 'C' &&
+ Look_Ahead_Letter (2) == 'H' &&
+ Look_Ahead_Letter (3) == 'W') {
+ Phonize (SH);
+ skip_letter += 2;
+ }
+#endif
+ else {
+ Phonize ('S');
+ }
+ break;
+ /*
+ * 'sh' in -TIA- or -TIO- else 'th' before H else T
+ */
+ case 'T':
+ if (Next_Letter == 'I' &&
+ (After_Next_Letter == 'O' || After_Next_Letter == 'A')) {
+ Phonize (SH);
+ } else if (Next_Letter == 'H') {
+ Phonize (TH);
+ skip_letter++;
+ } else {
+ Phonize ('T');
+ }
+ break;
+ /*
+ * F
+ */
+ case 'V':
+ Phonize ('F');
+ break;
+ /*
+ * W before a vowel, else dropped
+ */
+ case 'W':
+ if (isvowel (Next_Letter)) {
+ Phonize ('W');
+ }
+ break;
+ /*
+ * KS
+ */
+ case 'X':
+ Phonize ('K');
+ Phonize ('S');
+ break;
+ /*
+ * Y if followed by a vowel
+ */
+ case 'Y':
+ if (isvowel (Next_Letter)) {
+ Phonize ('Y');
+ }
+ break;
+ /*
+ * S
+ */
+ case 'Z':
+ Phonize ('S');
+ break;
+ /*
+ * No transformation
+ */
+ case 'F':
+ case 'J':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'R':
+ Phonize (Curr_Letter);
+ break;
+ default:
+ /*
+ * nothing
+ */
+ break;
+ } /* END SWITCH */
+
+ w_idx += skip_letter;
+ } /* END FOR */
+
+ End_Phoned_Word;
+
+ return TRUE;
+}
diff --git a/src/plugins/custom/regmark/metaphone.h b/src/plugins/custom/regmark/metaphone.h
new file mode 100644
index 000000000..4de3292f9
--- /dev/null
+++ b/src/plugins/custom/regmark/metaphone.h
@@ -0,0 +1,27 @@
+#ifndef RSPAMD_METAPHONE_H
+#define RSPAMD_METAPHONE_H
+
+#include "../../../config.h"
+
+
+#define ENCODE(c) (g_ascii_isalpha(c) ? _codes[((g_ascii_toupper(c)) - 'A')] : 0)
+
+#define isvowel(c) (ENCODE(c) & 1) /* AEIOU */
+
+/* These letters are passed through unchanged */
+#define NOCHANGE(c) (ENCODE(c) & 2) /* FJMNR */
+
+/* These form dipthongs when preceding H */
+#define AFFECTH(c) (ENCODE(c) & 4) /* CGPST */
+
+/* These make C and G soft */
+#define MAKESOFT(c) (ENCODE(c) & 8) /* EIY */
+
+/* These prevent GH from becoming F */
+#define NOGHTOF(c) (ENCODE(c) & 16) /* BDH */
+
+#undef USE_TRADITIONAL_METAPHONE
+
+gboolean metaphone (char *word, int max_phonemes, char **phoned_word);
+
+#endif
diff --git a/src/plugins/custom/regmark/prefix_tree.c b/src/plugins/custom/regmark/prefix_tree.c
new file mode 100644
index 000000000..0e2920616
--- /dev/null
+++ b/src/plugins/custom/regmark/prefix_tree.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2009, Rambler media
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../../../config.h"
+#include "prefix_tree.h"
+
+
+prefix_tree_t*
+prefix_tree_new (int levels)
+{
+ prefix_tree_t *result = NULL;
+
+ if (levels <= 0) {
+ return NULL;
+ }
+ /* Allocate tree */
+ result = g_new (prefix_tree_t, 1);
+ result->levels = levels;
+
+ /* Allocate levels */
+ result->nodes = g_new0 (prefix_tree_level_t, levels);
+
+ return result;
+}
+
+static uintptr_t
+add_string_common (prefix_tree_t *tree, const char *input, int skip_levels, gboolean read_only, gboolean get_longest)
+{
+ int cur_level = 0, num;
+ prefix_tree_level_t *cur;
+ uintptr_t res = 0;
+
+ if (tree == NULL) {
+ return 0;
+ }
+
+ while (*input && cur_level < tree->levels) {
+ cur = &tree->nodes[cur_level];
+ if (*input >= 'A' && *input <= 'Z') {
+ num = *input - 'A';
+ /* Go throught each level and check specified letter */
+ if (cur->leafs[num].data == 0) {
+ /* Create new leaf */
+ if (read_only) {
+ return res;
+ }
+ else {
+ cur->leafs[num].data = 1;
+ }
+ }
+ else {
+ /* Got some node, so check it */
+ if (cur_level > skip_levels) {
+ if (! read_only) {
+ cur->leafs[num].data ++;
+ }
+ if (! get_longest) {
+ /* Get maximum after skip */
+ if (res < cur->leafs[num].data) {
+ res = cur->leafs[num].data;
+ }
+ }
+ else {
+ res = cur->leafs[num].data;
+ }
+ }
+ }
+ }
+ input ++;
+ cur_level ++;
+ }
+
+ return res;
+}
+
+uintptr_t
+add_string (prefix_tree_t *tree, const char *input, int skip_levels)
+{
+ return add_string_common (tree, input, skip_levels, FALSE, FALSE);
+}
+
+uintptr_t
+check_string (prefix_tree_t *tree, const char *input, int skip_levels)
+{
+ return add_string_common (tree, input, skip_levels, TRUE, FALSE);
+}
+
+uintptr_t
+add_string_longest (prefix_tree_t *tree, const char *input, int skip_levels)
+{
+ return add_string_common (tree, input, skip_levels, FALSE, TRUE);
+}
+
+uintptr_t
+check_string_longest (prefix_tree_t *tree, const char *input, int skip_levels)
+{
+ return add_string_common (tree, input, skip_levels, TRUE, TRUE);
+}
+
+void
+prefix_tree_free (prefix_tree_t *tree)
+{
+ if (tree != NULL) {
+ g_free (tree->nodes);
+ g_free (tree);
+ }
+}
+
+gboolean
+save_prefix_tree (prefix_tree_t *tree, const char *filename)
+{
+ int fd, r;
+
+ if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) {
+ return FALSE;
+ }
+
+ if ((r = write (fd, &tree->levels, sizeof (int))) == -1) {
+ return FALSE;
+ }
+
+ if ((r = write (fd, tree->nodes, tree->levels * sizeof (prefix_tree_level_t))) == -1) {
+ return FALSE;
+ }
+
+ close (fd);
+
+ return TRUE;
+}
+
+prefix_tree_t*
+load_prefix_tree (const char *filename)
+{
+ int fd, r, levels;
+ prefix_tree_t *tree;
+
+ if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) {
+ return NULL;
+ }
+
+
+ if ((r = read (fd, &levels, sizeof (int))) == -1) {
+ return NULL;
+ }
+
+ tree = prefix_tree_new (levels);
+
+ if ((r = read (fd, tree->nodes, tree->levels * sizeof (prefix_tree_level_t))) == -1) {
+ prefix_tree_free (tree);
+ return NULL;
+ }
+
+ close (fd);
+
+ return tree;
+}
diff --git a/src/plugins/custom/regmark/prefix_tree.h b/src/plugins/custom/regmark/prefix_tree.h
new file mode 100644
index 000000000..30580d612
--- /dev/null
+++ b/src/plugins/custom/regmark/prefix_tree.h
@@ -0,0 +1,35 @@
+#ifndef RSPAMD_PREFIX_TREE_H
+#define RSPAMD_PREFIX_TREE_H
+
+#include "../../../config.h"
+
+#define LETTERS_NUMBER 26
+
+typedef struct prefix_tree_leaf_s {
+ uintptr_t data;
+} prefix_tree_leaf_t;
+
+typedef struct prefix_tree_level_s {
+ struct prefix_tree_leaf_s leafs[LETTERS_NUMBER];
+} prefix_tree_level_t;
+
+typedef struct prefix_tree_s {
+ prefix_tree_level_t *nodes;
+ int levels;
+} prefix_tree_t;
+
+prefix_tree_t* prefix_tree_new (int levels);
+
+uintptr_t add_string (prefix_tree_t *tree, const char *input, int skip_levels);
+
+uintptr_t check_string (prefix_tree_t *tree, const char *input, int skip_levels);
+
+uintptr_t add_string_longest (prefix_tree_t *tree, const char *input, int skip_levels);
+uintptr_t check_string_longest (prefix_tree_t *tree, const char *input, int skip_levels);
+
+void prefix_tree_free (prefix_tree_t *tree);
+
+gboolean save_prefix_tree (prefix_tree_t *tree, const char *filename);
+prefix_tree_t* load_prefix_tree (const char *filename);
+
+#endif
diff --git a/src/plugins/custom/regmark/regmark.c b/src/plugins/custom/regmark/regmark.c
new file mode 100644
index 000000000..c8cede47a
--- /dev/null
+++ b/src/plugins/custom/regmark/regmark.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2009, Rambler media
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This plugin can be used as registration spam tester. Algorithm of its work:
+ *
+ * 1) got string that identifies username, for example alexeyssad
+ * 2) do metaphone normalization
+ * 3) break this string into peaces: (al) (ex) (ey) (ss) (ad)
+ * 4) go throught the tree and increment each node value
+ * 5) find the biggest number of occurencies in some level of tree, for example:
+ * (root)
+ * |
+ * l1: (al:4) (hu:5) (tt:9)
+ * l2: (ex:4) (is:5) (hh:9)
+ * l3: (ey:3) ....
+ * l4: (ss:2)
+ * l5: (ad:1)
+ * then if we have requirement of minimum l3 (6 symbols of original string) then the maximum number would be 3, so we
+ * got name alexey 3 times before.
+ *
+ * So input line should look like this:
+ *
+ * <string> level
+ */
+
+#include "../../../config.h"
+#include "../../../cfg_file.h"
+#include "metaphone.h"
+#include "prefix_tree.h"
+
+#define MAX_LEVELS 32
+
+/* Exported functions */
+void module_init (struct config_file *cfg);
+void* before_connect (void);
+gboolean parse_line (const char *line, size_t len, char **output, void *user_data);
+void after_connect (char **output, char **log_line, void *user_data);
+void module_fin (void);
+
+/* Internal variables */
+static char *filename = NULL;
+static prefix_tree_t *tree = NULL;
+
+/* Implementation */
+
+char *
+get_module_opt (struct config_file *cfg, char *module_name, char *opt_name)
+{
+ GList *cur_opt;
+ struct module_opt *cur;
+
+ cur_opt = g_hash_table_lookup (cfg->modules_opts, module_name);
+ if (cur_opt == NULL) {
+ return NULL;
+ }
+
+ while (cur_opt) {
+ cur = cur_opt->data;
+ if (strcmp (cur->param, opt_name) == 0) {
+ return cur->value;
+ }
+ cur_opt = g_list_next (cur_opt);
+ }
+
+ return NULL;
+}
+
+void
+module_init (struct config_file *cfg)
+{
+ char *value;
+
+ if (cfg && (value = get_module_opt (cfg, "ipmark", "file")) != NULL) {
+ filename = g_strdup (value);
+ }
+
+ if (filename) {
+ tree = load_prefix_tree (filename);
+ if (! tree) {
+ tree = prefix_tree_new (MAX_LEVELS);
+ }
+ }
+ else {
+ tree = prefix_tree_new (MAX_LEVELS);
+ }
+
+}
+
+void *
+before_connect (void)
+{
+ /* In fact we do not need any session data, so just return NULL */
+ return NULL;
+}
+
+void
+module_fin (void)
+{
+ if (filename) {
+ save_prefix_tree (tree, filename);
+ g_free (filename);
+ filename = NULL;
+ }
+ if (tree) {
+ prefix_tree_free (tree);
+ tree = NULL;
+ }
+}
+
+gboolean
+parse_line (const char *line, size_t len, char **output, void *user_data)
+{
+ const char *p = line;
+ char *name, *metaname = NULL;
+ int levels;
+ uintptr_t res = 0;
+
+ while (p - line < len) {
+ if (g_ascii_isspace (*p)) {
+ name = g_malloc (p - line + 1);
+ g_strlcpy (name, line, p - line);
+ metaphone (name, 0, &metaname);
+ /* Skip spaces */
+ while (p - line < len && g_ascii_isspace (*p++));
+ levels = strtol (p, NULL, 10);
+ if (levels <= 0) {
+ levels = strlen (name);
+ }
+ if (metaname) {
+ res = add_string (tree, metaname, levels);
+ *output = g_strdup_printf ("OK: %u", (unsigned int)res);
+ g_free (metaname);
+ }
+ break;
+ }
+ }
+
+ if (res == 0) {
+ *output = g_strdup ("ERR");
+ }
+
+ return TRUE;
+}
+
+
+void after_connect (char **output, char **log_line, void *user_data)
+{
+ /* Placeholder */
+ return;
+}