diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-07-23 12:53:08 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-07-23 12:53:08 +0100 |
commit | fe79d8c5a39f2b717f78cc3f3ef21b3cfc46500b (patch) | |
tree | c84e6a5d4c5cd78a7a2cc3c7adbc7af5d0541682 /src/tokenizers | |
parent | e0483657ff6cf1adc828ccce457814d61fe90a0d (diff) | |
download | rspamd-fe79d8c5a39f2b717f78cc3f3ef21b3cfc46500b.tar.gz rspamd-fe79d8c5a39f2b717f78cc3f3ef21b3cfc46500b.zip |
Revert "Unify code style."
This reverts commit e0483657ff6cf1adc828ccce457814d61fe90a0d.
Diffstat (limited to 'src/tokenizers')
-rw-r--r-- | src/tokenizers/osb.c | 40 | ||||
-rw-r--r-- | src/tokenizers/tokenizers.c | 133 | ||||
-rw-r--r-- | src/tokenizers/tokenizers.h | 26 |
3 files changed, 82 insertions, 117 deletions
diff --git a/src/tokenizers/osb.c b/src/tokenizers/osb.c index faa6a9669..823e1e5b5 100644 --- a/src/tokenizers/osb.c +++ b/src/tokenizers/osb.c @@ -26,40 +26,32 @@ * OSB tokenizer */ -#include "tokenizers.h" #include <sys/types.h> +#include "tokenizers.h" /* Minimum length of token */ #define MIN_LEN 4 -extern const int primes[]; +extern const int primes[]; int -osb_tokenize_text (struct tokenizer *tokenizer, - rspamd_mempool_t * pool, - f_str_t * input, - GTree ** tree, - gboolean save_token, - gboolean is_utf, - GList *exceptions) +osb_tokenize_text (struct tokenizer *tokenizer, rspamd_mempool_t * pool, f_str_t * input, GTree ** tree, + gboolean save_token, gboolean is_utf, GList *exceptions) { - token_node_t *new = NULL; - f_str_t token = { NULL, 0, 0 }; - guint32 hashpipe[FEATURE_WINDOW_SIZE], h1, h2; - gint i, l, processed = 0; - gchar *res; + token_node_t *new = NULL; + f_str_t token = { NULL, 0, 0 }; + guint32 hashpipe[FEATURE_WINDOW_SIZE], h1, h2; + gint i, l, processed = 0; + gchar *res; if (*tree == NULL) { *tree = g_tree_new (token_node_compare_func); - rspamd_mempool_add_destructor (pool, - (rspamd_mempool_destruct_t) g_tree_destroy, - *tree); + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_tree_destroy, *tree); } memset (hashpipe, 0xfe, FEATURE_WINDOW_SIZE * sizeof (hashpipe[0])); - while ((res = - tokenizer->get_next_word (input, &token, &exceptions)) != NULL) { + while ((res = tokenizer->get_next_word (input, &token, &exceptions)) != NULL) { /* Skip small words */ if (is_utf) { l = g_utf8_strlen (token.begin, token.len); @@ -75,7 +67,7 @@ osb_tokenize_text (struct tokenizer *tokenizer, if (processed < FEATURE_WINDOW_SIZE) { /* Just fill a hashpipe */ hashpipe[FEATURE_WINDOW_SIZE - ++processed] = - fstrhash_lowercase (&token, is_utf); + fstrhash_lowercase (&token, is_utf); } else { /* Shift hashpipe */ @@ -83,18 +75,16 @@ osb_tokenize_text (struct tokenizer *tokenizer, hashpipe[i] = hashpipe[i - 1]; } hashpipe[0] = fstrhash_lowercase (&token, is_utf); - processed++; + processed ++; for (i = 1; i < FEATURE_WINDOW_SIZE; i++) { h1 = hashpipe[0] * primes[0] + hashpipe[i] * primes[i << 1]; - h2 = hashpipe[0] * primes[1] + hashpipe[i] * - primes[(i << 1) - 1]; + h2 = hashpipe[0] * primes[1] + hashpipe[i] * primes[(i << 1) - 1]; new = rspamd_mempool_alloc0 (pool, sizeof (token_node_t)); new->h1 = h1; new->h2 = h2; if (save_token) { - new->extra = - (uintptr_t)rspamd_mempool_fstrdup (pool, &token); + new->extra = (uintptr_t)rspamd_mempool_fstrdup (pool, &token); } if (g_tree_lookup (*tree, new) == NULL) { diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c index 05901531a..448dcd53e 100644 --- a/src/tokenizers/tokenizers.c +++ b/src/tokenizers/tokenizers.c @@ -26,15 +26,15 @@ * Common tokenization functions */ +#include <sys/types.h> #include "main.h" #include "tokenizers.h" -#include <sys/types.h> -struct tokenizer tokenizers[] = { +struct tokenizer tokenizers[] = { {"osb-text", osb_tokenize_text, get_next_word}, }; -const int primes[] = { +const int primes[] = { 1, 7, 3, 13, 5, 29, @@ -48,38 +48,38 @@ const int primes[] = { }; const gchar t_delimiters[255] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, - 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0 }; -struct tokenizer * +struct tokenizer * get_tokenizer (const char *name) { - guint i; + guint i; for (i = 0; i < sizeof (tokenizers) / sizeof (tokenizers[0]); i++) { if (strcmp (tokenizers[i].name, name) == 0) { @@ -93,7 +93,7 @@ get_tokenizer (const char *name) int token_node_compare_func (gconstpointer a, gconstpointer b) { - const token_node_t *aa = a, *bb = b; + const token_node_t *aa = a, *bb = b; if (aa->h1 == bb->h1) { return aa->h2 - bb->h2; @@ -106,9 +106,9 @@ token_node_compare_func (gconstpointer a, gconstpointer b) gchar * get_next_word (f_str_t * buf, f_str_t * token, GList **exceptions) { - gsize remain, pos; - guchar *p; - struct process_exception *ex = NULL; + gsize remain, pos; + guchar *p; + struct process_exception *ex = NULL; if (buf == NULL) { return NULL; @@ -165,7 +165,7 @@ get_next_word (f_str_t * buf, f_str_t * token, GList **exceptions) token->len++; pos++; remain--; - p++; + p ++; } if (remain == 0) { @@ -177,34 +177,30 @@ get_next_word (f_str_t * buf, f_str_t * token, GList **exceptions) /* Struct to access gmime headers */ struct raw_header { - struct raw_header *next; - char *name; - char *value; + struct raw_header *next; + char *name; + char *value; }; typedef struct _GMimeHeader { - GHashTable *hash; - GHashTable *writers; - struct raw_header *headers; + GHashTable *hash; + GHashTable *writers; + struct raw_header *headers; } local_GMimeHeader; int -tokenize_headers (rspamd_mempool_t * pool, - struct rspamd_task *task, - GTree ** tree) +tokenize_headers (rspamd_mempool_t * pool, struct rspamd_task *task, GTree ** tree) { - token_node_t *new = NULL; - f_str_t headername; - f_str_t headervalue; + token_node_t *new = NULL; + f_str_t headername; + f_str_t headervalue; if (*tree == NULL) { *tree = g_tree_new (token_node_compare_func); - rspamd_mempool_add_destructor (pool, - (rspamd_mempool_destruct_t) g_tree_destroy, - *tree); + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_tree_destroy, *tree); } #ifndef GMIME24 - struct raw_header *h; + struct raw_header *h; h = GMIME_OBJECT (task->message)->headers->headers; while (h) { @@ -223,10 +219,10 @@ tokenize_headers (rspamd_mempool_t * pool, h = h->next; } #else - GMimeHeaderList *ls; - GMimeHeaderIter *iter; - const char *name; - const char *value; + GMimeHeaderList *ls; + GMimeHeaderIter *iter; + const char *name; + const char *value; ls = GMIME_OBJECT (task->message)->headers; iter = g_mime_header_iter_new (); @@ -258,14 +254,13 @@ tokenize_headers (rspamd_mempool_t * pool, void tokenize_subject (struct rspamd_task *task, GTree ** tree) { - f_str_t subject; - const gchar *sub; - struct tokenizer *osb_tokenizer; + f_str_t subject; + const gchar *sub; + struct tokenizer *osb_tokenizer; if (*tree == NULL) { *tree = g_tree_new (token_node_compare_func); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t) g_tree_destroy, *tree); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_tree_destroy, *tree); } osb_tokenizer = get_tokenizer ("osb-text"); @@ -274,24 +269,12 @@ tokenize_subject (struct rspamd_task *task, GTree ** tree) if (task->subject != NULL) { subject.begin = task->subject; subject.len = strlen (task->subject); - osb_tokenizer->tokenize_func (osb_tokenizer, - task->task_pool, - &subject, - tree, - FALSE, - TRUE, - NULL); + osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE, TRUE, NULL); } if ((sub = g_mime_message_get_subject (task->message)) != NULL) { subject.begin = (gchar *)sub; subject.len = strlen (sub); - osb_tokenizer->tokenize_func (osb_tokenizer, - task->task_pool, - &subject, - tree, - FALSE, - TRUE, - NULL); + osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE, TRUE, NULL); } } diff --git a/src/tokenizers/tokenizers.h b/src/tokenizers/tokenizers.h index ce056377b..207602dc8 100644 --- a/src/tokenizers/tokenizers.h +++ b/src/tokenizers/tokenizers.h @@ -2,9 +2,9 @@ #define TOKENIZERS_H #include "config.h" +#include "mem_pool.h" #include "fstring.h" #include "main.h" -#include "mem_pool.h" /* Size for features pipe */ #define FEATURE_WINDOW_SIZE 5 @@ -19,30 +19,22 @@ typedef struct token_node_s { /* Common tokenizer structure */ struct tokenizer { gchar *name; - gint (*tokenize_func)(struct tokenizer *tokenizer, rspamd_mempool_t *pool, - f_str_t *input, - GTree **cur, gboolean save_token, gboolean is_utf, GList *exceptions); - gchar * (*get_next_word)(f_str_t *buf, f_str_t *token, GList **exceptions); + gint (*tokenize_func)(struct tokenizer *tokenizer, rspamd_mempool_t *pool, f_str_t *input, + GTree **cur, gboolean save_token, gboolean is_utf, GList *exceptions); + gchar* (*get_next_word)(f_str_t *buf, f_str_t *token, GList **exceptions); }; /* Compare two token nodes */ int token_node_compare_func (gconstpointer a, gconstpointer b); /* Get tokenizer structure by name or return NULL if this name is not found */ -struct tokenizer * get_tokenizer (const char *name); +struct tokenizer* get_tokenizer (const char *name); /* Get next word from specified f_str_t buf */ -gchar * get_next_word (f_str_t *buf, f_str_t *token, GList **exceptions); +gchar* get_next_word (f_str_t *buf, f_str_t *token, GList **exceptions); /* OSB tokenize function */ -int osb_tokenize_text (struct tokenizer *tokenizer, - rspamd_mempool_t *pool, - f_str_t *input, - GTree **cur, - gboolean save_token, - gboolean is_utf, - GList *exceptions); +int osb_tokenize_text (struct tokenizer *tokenizer, rspamd_mempool_t *pool, f_str_t *input, + GTree **cur, gboolean save_token, gboolean is_utf, GList *exceptions); /* Common tokenizer for headers */ -int tokenize_headers (rspamd_mempool_t *pool, - struct rspamd_task *task, - GTree **cur); +int tokenize_headers (rspamd_mempool_t *pool, struct rspamd_task *task, GTree **cur); /* Make tokens for a subject */ void tokenize_subject (struct rspamd_task *task, GTree ** tree); |