From d70fb7a4a7938936295a1ec263eba607adb26f69 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 6 Oct 2015 16:14:29 +0100 Subject: [PATCH] Fix statistics. --- src/libserver/protocol.c | 4 ++-- src/libserver/task.c | 12 +++++------ src/libstat/learn_cache/sqlite3_cache.c | 4 ++-- src/libstat/stat_process.c | 4 ++-- src/libstat/tokenizers/osb.c | 4 ++-- src/libstat/tokenizers/tokenizers.c | 27 ++++++++++++------------- src/libstat/tokenizers/tokenizers.h | 2 +- src/libutil/fstring.c | 6 +++--- src/libutil/fstring.h | 2 +- 9 files changed, 32 insertions(+), 33 deletions(-) diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index 360239f34..221db1513 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -270,11 +270,11 @@ rspamd_protocol_handle_url (struct rspamd_task *task, if (res == NULL) { /* Treat the whole query as path */ - task->msg.start = msg->url->str + u.field_data[UF_QUERY].off; + task->msg.begin = msg->url->str + u.field_data[UF_QUERY].off; task->msg.len = u.field_data[UF_QUERY].len; } else { - task->msg.start = rspamd_mempool_strdup (task->task_pool, res->str); + task->msg.begin = rspamd_mempool_strdup (task->task_pool, res->str); task->msg.len = res->len; } diff --git a/src/libserver/task.c b/src/libserver/task.c index fc22f784f..6d0eca2a2 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -249,7 +249,7 @@ rspamd_task_unmapper (gpointer ud) { struct rspamd_task *task = ud; - munmap ((void *)task->msg.start, task->msg.len); + munmap ((void *)task->msg.begin, task->msg.len); } gboolean @@ -271,7 +271,7 @@ rspamd_task_load_message (struct rspamd_task *task, if (task->flags & RSPAMD_TASK_FLAG_FILE) { g_assert (task->msg.len > 0); - r = rspamd_strlcpy (filepath, task->msg.start, + r = rspamd_strlcpy (filepath, task->msg.begin, MIN (sizeof (filepath), task->msg.len + 1)); rspamd_decode_url (filepath, filepath, r + 1); @@ -311,14 +311,14 @@ rspamd_task_load_message (struct rspamd_task *task, } close (fd); - task->msg.start = map; + task->msg.begin = map; task->msg.len = st.st_size; rspamd_mempool_add_destructor (task->task_pool, rspamd_task_unmapper, task); } else { debug_task ("got input of length %z", task->msg.len); - task->msg.start = start; + task->msg.begin = start; task->msg.len = len; if (task->msg.len == 0) { @@ -343,7 +343,7 @@ rspamd_task_load_message (struct rspamd_task *task, if (control_len > 0) { parser = ucl_parser_new (UCL_PARSER_KEY_LOWERCASE); - if (!ucl_parser_add_chunk (parser, task->msg.start, control_len)) { + if (!ucl_parser_add_chunk (parser, task->msg.begin, control_len)) { msg_warn_task ("processing of control chunk failed: %s", ucl_parser_get_error (parser)); ucl_parser_free (parser); @@ -355,7 +355,7 @@ rspamd_task_load_message (struct rspamd_task *task, ucl_object_unref (control_obj); } - task->msg.start += control_len; + task->msg.begin += control_len; task->msg.len -= control_len; } } diff --git a/src/libstat/learn_cache/sqlite3_cache.c b/src/libstat/learn_cache/sqlite3_cache.c index 6eabaefda..987f6cb0c 100644 --- a/src/libstat/learn_cache/sqlite3_cache.c +++ b/src/libstat/learn_cache/sqlite3_cache.c @@ -247,7 +247,7 @@ rspamd_stat_cache_sqlite3_process (struct rspamd_task *task, struct rspamd_stat_sqlite3_ctx *ctx = (struct rspamd_stat_sqlite3_ctx *)c; struct mime_text_part *part; blake2b_state st; - rspamd_fstring_t *word; + rspamd_ftok_t *word; guchar out[BLAKE2B_OUTBYTES]; guint i, j; @@ -259,7 +259,7 @@ rspamd_stat_cache_sqlite3_process (struct rspamd_task *task, if (part->words != NULL) { for (j = 0; j < part->words->len; j ++) { - word = &g_array_index (part->words, rspamd_fstring_t, j); + word = &g_array_index (part->words, rspamd_ftok_t, j); blake2b_update (&st, word->begin, word->len); } } diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index a6f5d31ae..a1cbe9c33 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -52,7 +52,7 @@ rspamd_stat_tokenize_header (struct rspamd_task *task, const gchar *name, const gchar *prefix, GArray *ar) { struct raw_header *rh, *cur; - rspamd_fstring_t str; + rspamd_ftok_t str; rh = g_hash_table_lookup (task->raw_headers, name); @@ -89,7 +89,7 @@ rspamd_stat_tokenize_parts_metadata (struct rspamd_task *task, struct mime_text_part *tp; GList *cur; GArray *ar; - rspamd_fstring_t elt; + rspamd_ftok_t elt; guint i; ar = g_array_sized_new (FALSE, FALSE, sizeof (elt), 4); diff --git a/src/libstat/tokenizers/osb.c b/src/libstat/tokenizers/osb.c index 690a7ab5b..7744e2883 100644 --- a/src/libstat/tokenizers/osb.c +++ b/src/libstat/tokenizers/osb.c @@ -232,7 +232,7 @@ rspamd_tokenizer_osb (struct rspamd_tokenizer_runtime *rt, const gchar *prefix) { rspamd_token_t *new = NULL; - rspamd_fstring_t *token; + rspamd_ftok_t *token; struct rspamd_osb_tokenizer_config *osb_cf; guint64 *hashpipe, cur, seed; guint32 h1, h2; @@ -259,7 +259,7 @@ rspamd_tokenizer_osb (struct rspamd_tokenizer_runtime *rt, memset (hashpipe, 0xfe, window_size * sizeof (hashpipe[0])); for (w = 0; w < input->len; w ++) { - token = &g_array_index (input, rspamd_fstring_t, w); + token = &g_array_index (input, rspamd_ftok_t, w); if (osb_cf->ht == RSPAMD_OSB_HASH_COMPAT) { cur = rspamd_fstrhash_lc (token, is_utf); diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index 07e7a1f45..439c639a6 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -30,8 +30,8 @@ #include "tokenizers.h" #include "stat_internal.h" -typedef gboolean (*token_get_function) (rspamd_fstring_t * buf, gchar **pos, - rspamd_fstring_t * token, +typedef gboolean (*token_get_function) (rspamd_ftok_t * buf, gchar const **pos, + rspamd_ftok_t * token, GList **exceptions, gboolean is_utf, gsize *rl, gboolean check_signature); const gchar t_delimiters[255] = { @@ -77,12 +77,12 @@ token_node_compare_func (gconstpointer a, gconstpointer b) /* Get next word from specified f_str_t buf */ static gboolean -rspamd_tokenizer_get_word_compat (rspamd_fstring_t * buf, - gchar **cur, rspamd_fstring_t * token, +rspamd_tokenizer_get_word_compat (rspamd_ftok_t * buf, + gchar const **cur, rspamd_ftok_t * token, GList **exceptions, gboolean is_utf, gsize *rl, gboolean unused) { gsize remain, pos; - guchar *p; + const gchar *p; struct process_exception *ex = NULL; if (buf == NULL) { @@ -134,11 +134,11 @@ rspamd_tokenizer_get_word_compat (rspamd_fstring_t * buf, pos++; p++; remain--; - } while (remain > 0 && t_delimiters[*p]); + } while (remain > 0 && t_delimiters[(guchar)*p]); token->begin = p; - while (remain > 0 && !t_delimiters[*p]) { + while (remain > 0 && !t_delimiters[(guchar)*p]) { if (ex != NULL && ex->pos == pos) { *exceptions = g_list_next (*exceptions); *cur = p + ex->len; @@ -169,13 +169,13 @@ rspamd_tokenizer_get_word_compat (rspamd_fstring_t * buf, } static gboolean -rspamd_tokenizer_get_word (rspamd_fstring_t * buf, - gchar **cur, rspamd_fstring_t * token, +rspamd_tokenizer_get_word (rspamd_ftok_t * buf, + gchar const **cur, rspamd_ftok_t * token, GList **exceptions, gboolean is_utf, gsize *rl, gboolean check_signature) { gsize remain, pos, siglen = 0; - gchar *p, *next_p, *sig = NULL; + const gchar *p, *next_p, *sig = NULL; gunichar uc; guint processed = 0; struct process_exception *ex = NULL; @@ -292,8 +292,8 @@ rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf, gsize min_len, GList *exceptions, gboolean compat, gboolean check_signature) { - rspamd_fstring_t token, buf; - gchar *pos = NULL; + rspamd_ftok_t token, buf; + const gchar *pos = NULL; gsize l; GArray *res; GList *cur = exceptions; @@ -305,7 +305,6 @@ rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf, buf.begin = text; buf.len = len; - buf.size = buf.len; token.begin = NULL; token.len = 0; @@ -316,7 +315,7 @@ rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf, func = rspamd_tokenizer_get_word; } - res = g_array_sized_new (FALSE, FALSE, sizeof (rspamd_fstring_t), 128); + res = g_array_sized_new (FALSE, FALSE, sizeof (rspamd_ftok_t), 128); while (func (&buf, &pos, &token, &cur, is_utf, &l, FALSE)) { if (l == 0 || (min_len > 0 && l < min_len)) { diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h index 7b01d9fe8..4689d1cc6 100644 --- a/src/libstat/tokenizers/tokenizers.h +++ b/src/libstat/tokenizers/tokenizers.h @@ -33,7 +33,7 @@ struct rspamd_stat_tokenizer { gint token_node_compare_func (gconstpointer a, gconstpointer b); -/* Tokenize text into array of words (rspamd_fstring_t type) */ +/* Tokenize text into array of words (rspamd_ftok_t type) */ GArray * rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf, gsize min_len, GList *exceptions, gboolean compat, gboolean check_signature); diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c index ba580cd8b..6a1304423 100644 --- a/src/libutil/fstring.c +++ b/src/libutil/fstring.c @@ -183,7 +183,7 @@ fstrhash_c (gchar c, guint32 hval) * Return hash value for a string */ guint32 -rspamd_fstrhash_lc (const rspamd_fstring_t * str, gboolean is_utf) +rspamd_fstrhash_lc (const rspamd_ftok_t * str, gboolean is_utf) { gsize i; guint32 j, hval; @@ -195,11 +195,11 @@ rspamd_fstrhash_lc (const rspamd_fstring_t * str, gboolean is_utf) return 0; } - p = str->str; + p = str->begin; hval = str->len; if (is_utf) { - while (end < str->str + str->len) { + while (end < str->begin + str->len) { if (!g_utf8_validate (p, str->len, &end)) { return rspamd_fstrhash_lc (str, FALSE); } diff --git a/src/libutil/fstring.h b/src/libutil/fstring.h index 0d257633b..02b92bea3 100644 --- a/src/libutil/fstring.h +++ b/src/libutil/fstring.h @@ -86,7 +86,7 @@ char * rspamd_fstring_cstr (const rspamd_fstring_t *str); /* * Return fast hash value for fixed string converted to lowercase */ -guint32 rspamd_fstrhash_lc (const rspamd_fstring_t *str, gboolean is_utf); +guint32 rspamd_fstrhash_lc (const rspamd_ftok_t *str, gboolean is_utf); /** * Return true if two strings are equal -- 2.39.5