diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-07-23 12:57:31 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-07-23 12:57:31 +0100 |
commit | 379055dbbb4af997b4d3ffb161d447872d7ca357 (patch) | |
tree | 3774553d470f93e12ddeb454aad9b3b607cf8918 /src/tokenizers | |
parent | 602ae7a0b7e215ba2677131b8fdc70abc156b3ca (diff) | |
download | rspamd-379055dbbb4af997b4d3ffb161d447872d7ca357.tar.gz rspamd-379055dbbb4af997b4d3ffb161d447872d7ca357.zip |
Unify style without sorting headers.
Diffstat (limited to 'src/tokenizers')
-rw-r--r-- | src/tokenizers/osb.c | 38 | ||||
-rw-r--r-- | src/tokenizers/tokenizers.c | 131 | ||||
-rw-r--r-- | src/tokenizers/tokenizers.h | 24 |
3 files changed, 114 insertions, 79 deletions
diff --git a/src/tokenizers/osb.c b/src/tokenizers/osb.c index 823e1e5b5..b74441eca 100644 --- a/src/tokenizers/osb.c +++ b/src/tokenizers/osb.c @@ -32,26 +32,34 @@ /* Minimum length of token */ #define MIN_LEN 4 -extern const int primes[]; +extern const int primes[]; int -osb_tokenize_text (struct tokenizer *tokenizer, rspamd_mempool_t * pool, f_str_t * input, GTree ** tree, - gboolean save_token, gboolean is_utf, GList *exceptions) +osb_tokenize_text (struct tokenizer *tokenizer, + rspamd_mempool_t * pool, + f_str_t * input, + GTree ** tree, + gboolean save_token, + gboolean is_utf, + GList *exceptions) { - token_node_t *new = NULL; - f_str_t token = { NULL, 0, 0 }; - guint32 hashpipe[FEATURE_WINDOW_SIZE], h1, h2; - gint i, l, processed = 0; - gchar *res; + token_node_t *new = NULL; + f_str_t token = { NULL, 0, 0 }; + guint32 hashpipe[FEATURE_WINDOW_SIZE], h1, h2; + gint i, l, processed = 0; + gchar *res; if (*tree == NULL) { *tree = g_tree_new (token_node_compare_func); - rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_tree_destroy, *tree); + rspamd_mempool_add_destructor (pool, + (rspamd_mempool_destruct_t) g_tree_destroy, + *tree); } memset (hashpipe, 0xfe, FEATURE_WINDOW_SIZE * sizeof (hashpipe[0])); - while ((res = tokenizer->get_next_word (input, &token, &exceptions)) != NULL) { + while ((res = + tokenizer->get_next_word (input, &token, &exceptions)) != NULL) { /* Skip small words */ if (is_utf) { l = g_utf8_strlen (token.begin, token.len); @@ -67,7 +75,7 @@ osb_tokenize_text (struct tokenizer *tokenizer, rspamd_mempool_t * pool, f_str_t if (processed < FEATURE_WINDOW_SIZE) { /* Just fill a hashpipe */ hashpipe[FEATURE_WINDOW_SIZE - ++processed] = - fstrhash_lowercase (&token, is_utf); + fstrhash_lowercase (&token, is_utf); } else { /* Shift hashpipe */ @@ -75,16 +83,18 @@ osb_tokenize_text (struct tokenizer *tokenizer, rspamd_mempool_t * pool, f_str_t hashpipe[i] = hashpipe[i - 1]; } hashpipe[0] = fstrhash_lowercase (&token, is_utf); - processed ++; + processed++; for (i = 1; i < FEATURE_WINDOW_SIZE; i++) { h1 = hashpipe[0] * primes[0] + hashpipe[i] * primes[i << 1]; - h2 = hashpipe[0] * primes[1] + hashpipe[i] * primes[(i << 1) - 1]; + h2 = hashpipe[0] * primes[1] + hashpipe[i] * + primes[(i << 1) - 1]; new = rspamd_mempool_alloc0 (pool, sizeof (token_node_t)); new->h1 = h1; new->h2 = h2; if (save_token) { - new->extra = (uintptr_t)rspamd_mempool_fstrdup (pool, &token); + new->extra = + (uintptr_t)rspamd_mempool_fstrdup (pool, &token); } if (g_tree_lookup (*tree, new) == NULL) { diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c index 448dcd53e..eb7a489e5 100644 --- a/src/tokenizers/tokenizers.c +++ b/src/tokenizers/tokenizers.c @@ -30,11 +30,11 @@ #include "main.h" #include "tokenizers.h" -struct tokenizer tokenizers[] = { +struct tokenizer tokenizers[] = { {"osb-text", osb_tokenize_text, get_next_word}, }; -const int primes[] = { +const int primes[] = { 1, 7, 3, 13, 5, 29, @@ -48,38 +48,38 @@ const int primes[] = { }; const gchar t_delimiters[255] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, - 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0 }; -struct tokenizer * +struct tokenizer * get_tokenizer (const char *name) { - guint i; + guint i; for (i = 0; i < sizeof (tokenizers) / sizeof (tokenizers[0]); i++) { if (strcmp (tokenizers[i].name, name) == 0) { @@ -93,7 +93,7 @@ get_tokenizer (const char *name) int token_node_compare_func (gconstpointer a, gconstpointer b) { - const token_node_t *aa = a, *bb = b; + const token_node_t *aa = a, *bb = b; if (aa->h1 == bb->h1) { return aa->h2 - bb->h2; @@ -106,9 +106,9 @@ token_node_compare_func (gconstpointer a, gconstpointer b) gchar * get_next_word (f_str_t * buf, f_str_t * token, GList **exceptions) { - gsize remain, pos; - guchar *p; - struct process_exception *ex = NULL; + gsize remain, pos; + guchar *p; + struct process_exception *ex = NULL; if (buf == NULL) { return NULL; @@ -165,7 +165,7 @@ get_next_word (f_str_t * buf, f_str_t * token, GList **exceptions) token->len++; pos++; remain--; - p ++; + p++; } if (remain == 0) { @@ -177,30 +177,34 @@ get_next_word (f_str_t * buf, f_str_t * token, GList **exceptions) /* Struct to access gmime headers */ struct raw_header { - struct raw_header *next; - char *name; - char *value; + struct raw_header *next; + char *name; + char *value; }; typedef struct _GMimeHeader { - GHashTable *hash; - GHashTable *writers; - struct raw_header *headers; + GHashTable *hash; + GHashTable *writers; + struct raw_header *headers; } local_GMimeHeader; int -tokenize_headers (rspamd_mempool_t * pool, struct rspamd_task *task, GTree ** tree) +tokenize_headers (rspamd_mempool_t * pool, + struct rspamd_task *task, + GTree ** tree) { - token_node_t *new = NULL; - f_str_t headername; - f_str_t headervalue; + token_node_t *new = NULL; + f_str_t headername; + f_str_t headervalue; if (*tree == NULL) { *tree = g_tree_new (token_node_compare_func); - rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_tree_destroy, *tree); + rspamd_mempool_add_destructor (pool, + (rspamd_mempool_destruct_t) g_tree_destroy, + *tree); } #ifndef GMIME24 - struct raw_header *h; + struct raw_header *h; h = GMIME_OBJECT (task->message)->headers->headers; while (h) { @@ -219,10 +223,10 @@ tokenize_headers (rspamd_mempool_t * pool, struct rspamd_task *task, GTree ** tr h = h->next; } #else - GMimeHeaderList *ls; - GMimeHeaderIter *iter; - const char *name; - const char *value; + GMimeHeaderList *ls; + GMimeHeaderIter *iter; + const char *name; + const char *value; ls = GMIME_OBJECT (task->message)->headers; iter = g_mime_header_iter_new (); @@ -254,13 +258,14 @@ tokenize_headers (rspamd_mempool_t * pool, struct rspamd_task *task, GTree ** tr void tokenize_subject (struct rspamd_task *task, GTree ** tree) { - f_str_t subject; - const gchar *sub; - struct tokenizer *osb_tokenizer; + f_str_t subject; + const gchar *sub; + struct tokenizer *osb_tokenizer; if (*tree == NULL) { *tree = g_tree_new (token_node_compare_func); - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_tree_destroy, *tree); + rspamd_mempool_add_destructor (task->task_pool, + (rspamd_mempool_destruct_t) g_tree_destroy, *tree); } osb_tokenizer = get_tokenizer ("osb-text"); @@ -269,12 +274,24 @@ tokenize_subject (struct rspamd_task *task, GTree ** tree) if (task->subject != NULL) { subject.begin = task->subject; subject.len = strlen (task->subject); - osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE, TRUE, NULL); + osb_tokenizer->tokenize_func (osb_tokenizer, + task->task_pool, + &subject, + tree, + FALSE, + TRUE, + NULL); } if ((sub = g_mime_message_get_subject (task->message)) != NULL) { subject.begin = (gchar *)sub; subject.len = strlen (sub); - osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE, TRUE, NULL); + osb_tokenizer->tokenize_func (osb_tokenizer, + task->task_pool, + &subject, + tree, + FALSE, + TRUE, + NULL); } } diff --git a/src/tokenizers/tokenizers.h b/src/tokenizers/tokenizers.h index 207602dc8..883f38058 100644 --- a/src/tokenizers/tokenizers.h +++ b/src/tokenizers/tokenizers.h @@ -19,22 +19,30 @@ typedef struct token_node_s { /* Common tokenizer structure */ struct tokenizer { gchar *name; - gint (*tokenize_func)(struct tokenizer *tokenizer, rspamd_mempool_t *pool, f_str_t *input, - GTree **cur, gboolean save_token, gboolean is_utf, GList *exceptions); - gchar* (*get_next_word)(f_str_t *buf, f_str_t *token, GList **exceptions); + gint (*tokenize_func)(struct tokenizer *tokenizer, rspamd_mempool_t *pool, + f_str_t *input, + GTree **cur, gboolean save_token, gboolean is_utf, GList *exceptions); + gchar * (*get_next_word)(f_str_t *buf, f_str_t *token, GList **exceptions); }; /* Compare two token nodes */ int token_node_compare_func (gconstpointer a, gconstpointer b); /* Get tokenizer structure by name or return NULL if this name is not found */ -struct tokenizer* get_tokenizer (const char *name); +struct tokenizer * get_tokenizer (const char *name); /* Get next word from specified f_str_t buf */ -gchar* get_next_word (f_str_t *buf, f_str_t *token, GList **exceptions); +gchar * get_next_word (f_str_t *buf, f_str_t *token, GList **exceptions); /* OSB tokenize function */ -int osb_tokenize_text (struct tokenizer *tokenizer, rspamd_mempool_t *pool, f_str_t *input, - GTree **cur, gboolean save_token, gboolean is_utf, GList *exceptions); +int osb_tokenize_text (struct tokenizer *tokenizer, + rspamd_mempool_t *pool, + f_str_t *input, + GTree **cur, + gboolean save_token, + gboolean is_utf, + GList *exceptions); /* Common tokenizer for headers */ -int tokenize_headers (rspamd_mempool_t *pool, struct rspamd_task *task, GTree **cur); +int tokenize_headers (rspamd_mempool_t *pool, + struct rspamd_task *task, + GTree **cur); /* Make tokens for a subject */ void tokenize_subject (struct rspamd_task *task, GTree ** tree); |