aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-01-17 22:11:02 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-01-18 23:05:15 +0000
commitb90e228f87962d95b43152a5f2e8dff43bf1d83a (patch)
treec30eb3a364dccd4ca8bd69ef08baae929d3e0d8d
parenteb9ae27c70a0b3030e6302cc065485f798a05e82 (diff)
downloadrspamd-b90e228f87962d95b43152a5f2e8dff43bf1d83a.tar.gz
rspamd-b90e228f87962d95b43152a5f2e8dff43bf1d83a.zip
New statistics token definition.
-rw-r--r--src/libstat/tokenizers.h15
-rw-r--r--src/libstat/tokenizers/osb.c17
-rw-r--r--src/libstat/tokenizers/tokenizers.c6
3 files changed, 20 insertions, 18 deletions
diff --git a/src/libstat/tokenizers.h b/src/libstat/tokenizers.h
index c0d2e8934..7943c212a 100644
--- a/src/libstat/tokenizers.h
+++ b/src/libstat/tokenizers.h
@@ -8,12 +8,19 @@
/* Size for features pipe */
#define FEATURE_WINDOW_SIZE 5
+#define MAX_DATA_LEN 64
+#define MAX_VALUES 32
-typedef struct token_node_s {
- guint32 h1;
- guint32 h2;
+struct token_result {
double value;
- uintptr_t extra;
+ struct rspamd_statfile_config *st;
+};
+
+typedef struct token_node_s {
+ guchar data[MAX_DATA_LEN];
+ guint datalen;
+ struct token_result *results;
+ guint results_len;
} token_node_t;
/* Common tokenizer structure */
diff --git a/src/libstat/tokenizers/osb.c b/src/libstat/tokenizers/osb.c
index 9dd12a8dd..4016842b6 100644
--- a/src/libstat/tokenizers/osb.c
+++ b/src/libstat/tokenizers/osb.c
@@ -83,12 +83,9 @@ osb_tokenize_text (struct tokenizer *tokenizer,
h2 = hashpipe[0] * primes[1] + hashpipe[i] *
primes[(i << 1) - 1];
new = rspamd_mempool_alloc0 (pool, sizeof (token_node_t));
- new->h1 = h1;
- new->h2 = h2;
- if (save_token) {
- new->extra =
- (uintptr_t)rspamd_mempool_fstrdup (pool, token);
- }
+ new->datalen = sizeof(gint32) * 2;
+ memcpy(new->data, &h1, sizeof(h1));
+ memcpy(new->data + sizeof(h1), &h2, sizeof(h2));
if (g_tree_lookup (*tree, new) == NULL) {
g_tree_insert (*tree, new, new);
@@ -102,11 +99,9 @@ osb_tokenize_text (struct tokenizer *tokenizer,
h1 = hashpipe[0] * primes[0] + hashpipe[i] * primes[i << 1];
h2 = hashpipe[0] * primes[1] + hashpipe[i] * primes[(i << 1) - 1];
new = rspamd_mempool_alloc0 (pool, sizeof (token_node_t));
- new->h1 = h1;
- new->h2 = h2;
- if (save_token) {
- new->extra = (uintptr_t)rspamd_mempool_fstrdup (pool, token);
- }
+ new->datalen = sizeof(gint32) * 2;
+ memcpy(new->data, &h1, sizeof(h1));
+ memcpy(new->data + sizeof(h1), &h2, sizeof(h2));
if (g_tree_lookup (*tree, new) == NULL) {
g_tree_insert (*tree, new, new);
diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c
index ce221397d..7d00f693a 100644
--- a/src/libstat/tokenizers/tokenizers.c
+++ b/src/libstat/tokenizers/tokenizers.c
@@ -94,11 +94,11 @@ token_node_compare_func (gconstpointer a, gconstpointer b)
{
const token_node_t *aa = a, *bb = b;
- if (aa->h1 == bb->h1) {
- return aa->h2 - bb->h2;
+ if (aa->datalen != bb->datalen) {
+ return aa->datalen - bb->datalen;
}
- return aa->h1 - bb->h1;
+ return memcmp (aa->data, bb->data, aa->datalen);
}
/* Get next word from specified f_str_t buf */