for (i = 0; i < tokens->len; i++) {
tok = g_ptr_array_index (tokens, i);
- memcpy (&h1, tok->data, sizeof (h1));
- memcpy (&h2, tok->data + sizeof (h1), sizeof (h2));
+ memcpy (&h1, (guchar *)&tok->data, sizeof (h1));
+ memcpy (&h2, ((guchar *)&tok->data) + sizeof (h1), sizeof (h2));
tok->values[id] = rspamd_mmaped_file_get_block (mf, h1, h2);
}
for (i = 0; i < tokens->len; i++) {
tok = g_ptr_array_index (tokens, i);
- memcpy (&h1, tok->data, sizeof (h1));
- memcpy (&h2, tok->data + sizeof (h1), sizeof (h2));
+ memcpy (&h1, (guchar *)&tok->data, sizeof (h1));
+ memcpy (&h2, ((guchar *)&tok->data) + sizeof (h1), sizeof (h2));
rspamd_mmaped_file_set_block (task->task_pool, mf, h1, h2,
tok->values[id]);
}
rspamd_token_t *tok;
gchar n0[64], n1[64];
guint i, l0, l1, larg0, larg1;
- guint64 num;
g_assert (tokens != NULL);
for (i = 0; i < tokens->len; i ++) {
tok = g_ptr_array_index (tokens, i);
- memcpy (&num, tok->data, sizeof (num));
if (learn) {
rspamd_printf_fstring (&out, ""
larg0, arg0,
larg1, arg1);
- l0 = rspamd_snprintf (n0, sizeof (n0), "%uL", num);
+ l0 = rspamd_snprintf (n0, sizeof (n0), "%uL", tok->data);
if (intvals) {
l1 = rspamd_snprintf (n1, sizeof (n1), "%L",
"%s\r\n", l0, n0, l1, n1);
}
else {
- l0 = rspamd_snprintf (n0, sizeof (n0), "%uL", num);
+ l0 = rspamd_snprintf (n0, sizeof (n0), "%uL", tok->data);
rspamd_printf_fstring (&out, ""
"$%d\r\n"
"%s\r\n", l0, n0);
{
struct rspamd_stat_sqlite3_db *bk;
struct rspamd_stat_sqlite3_rt *rt = p;
- gint64 iv = 0, idx;
+ gint64 iv = 0;
guint i;
rspamd_token_t *tok;
}
}
- memcpy (&idx, tok->data, sizeof (idx));
-
if (rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt,
RSPAMD_STAT_BACKEND_GET_TOKEN,
- idx, rt->user_id, rt->lang_id, &iv) == SQLITE_OK) {
+ tok->data, rt->user_id, rt->lang_id, &iv) == SQLITE_OK) {
tok->values[id] = iv;
}
else {
{
struct rspamd_stat_sqlite3_db *bk;
struct rspamd_stat_sqlite3_rt *rt = p;
- gint64 iv = 0, idx;
+ gint64 iv = 0;
guint i;
rspamd_token_t *tok;
}
iv = tok->values[id];
- memcpy (&idx, tok->data, sizeof (idx));
if (rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt,
RSPAMD_STAT_BACKEND_SET_TOKEN,
- idx, rt->user_id, rt->lang_id, iv) != SQLITE_OK) {
+ tok->data, rt->user_id, rt->lang_id, iv) != SQLITE_OK) {
rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt,
RSPAMD_STAT_BACKEND_TRANSACTION_ROLLBACK);
bk->in_transaction = FALSE;
cl->processed_tokens ++;
if (tok->t1 && tok->t2) {
- msg_debug_bayes ("token <%*s:%*s>: weight: %f, total_count: %L, "
+ msg_debug_bayes ("token %uL <%*s:%*s>: weight: %f, total_count: %L, "
"spam_count: %L, ham_count: %L,"
"spam_prob: %.3f, ham_prob: %.3f, "
"bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, "
"current spam prob: %.3f, current ham prob: %.3f",
+ tok->data,
(int) tok->t1->len, tok->t1->begin,
(int) tok->t2->len, tok->t2->begin,
fw, total_count, spam_count, ham_count,
cl->spam_prob, cl->ham_prob);
}
else {
- msg_debug_bayes ("token <?:?>: weight: %f, total_count: %L, "
+ msg_debug_bayes ("token %uL <?:?>: weight: %f, total_count: %L, "
"spam_count: %L, ham_count: %L,"
"spam_prob: %.3f, ham_prob: %.3f, "
"bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, "
"current spam prob: %.3f, current ham prob: %.3f",
+ tok->data,
fw, total_count, spam_count, ham_count,
spam_prob, ham_prob,
bayes_spam_prob, bayes_ham_prob,
gboolean unlearn,
GError **err)
{
- guint i, j;
+ guint i, j, total_cnt, spam_cnt, ham_cnt;
gint id;
struct rspamd_statfile *st;
rspamd_token_t *tok;
incrementing = ctx->cfg->flags & RSPAMD_FLAG_CLASSIFIER_INCREMENTING_BACKEND;
for (i = 0; i < tokens->len; i++) {
+ total_cnt = 0;
+ spam_cnt = 0;
+ ham_cnt = 0;
tok = g_ptr_array_index (tokens, i);
for (j = 0; j < ctx->statfiles_ids->len; j++) {
else {
tok->values[id]++;
}
- }
- else if (tok->values[id] > 0 && unlearn) {
- /* Unlearning */
- if (incrementing) {
- tok->values[id] = -1;
+
+ total_cnt += tok->values[id];
+
+ if (st->stcf->is_spam) {
+ spam_cnt += tok->values[id];
}
else {
- tok->values[id]--;
+ ham_cnt += tok->values[id];
}
}
- else if (incrementing) {
- tok->values[id] = 0;
+ else {
+ if (tok->values[id] > 0 && unlearn) {
+ /* Unlearning */
+ if (incrementing) {
+ tok->values[id] = -1;
+ }
+ else {
+ tok->values[id]--;
+ }
+
+ if (st->stcf->is_spam) {
+ spam_cnt += tok->values[id];
+ }
+ else {
+ ham_cnt += tok->values[id];
+ }
+ total_cnt += tok->values[id];
+ }
+ else if (incrementing) {
+ tok->values[id] = 0;
+ }
}
}
+
+ if (tok->t1 && tok->t2) {
+ msg_debug_bayes ("token %uL <%*s:%*s>: window: %d, total_count: %d, "
+ "spam_count: %d, ham_count: %d",
+ tok->data,
+ (int) tok->t1->len, tok->t1->begin,
+ (int) tok->t2->len, tok->t2->begin,
+ tok->window_idx, total_cnt, spam_cnt, ham_cnt);
+ }
+ else {
+ msg_debug_bayes ("token %uL <?:?>: window: %d, total_count: %d, "
+ "spam_count: %d, ham_count: %d",
+ tok->data,
+ tok->window_idx, total_cnt, spam_cnt, ham_cnt);
+ }
}
return TRUE;
-}
+}
\ No newline at end of file
for (i = 0; i < tokens->len; i ++) {
tok = g_ptr_array_index (tokens, i);
- v = 0;
- memcpy (&v, tok->data, MIN (sizeof (v), tok->datalen));
+ v = tok->data;
lua_createtable (L, 3, 0);
/* High word, low word, order */
lua_pushnumber (L, (guint32)(v >> 32));
for (i = 0; i < tokens->len; i ++) {
tok = g_ptr_array_index (tokens, i);
v = 0;
- memcpy (&v, tok->data, MIN (sizeof (v), tok->datalen));
+ v = tok->data;
lua_createtable (L, 3, 0);
/* High word, low word, order */
lua_pushnumber (L, (guint32)(v >> 32));
for (i = 0; i < task->tokens->len; i ++) {
tok = g_ptr_array_index (task->tokens, i);
- rspamd_cryptobox_hash_update (&st, tok->data, tok->datalen);
+ rspamd_cryptobox_hash_update (&st, (guchar *)&tok->data,
+ sizeof (tok->data));
}
rspamd_cryptobox_hash_final (&st, out);
for (i = 0; i < task->tokens->len; i ++) {
tok = g_ptr_array_index (task->tokens, i);
- rspamd_cryptobox_hash_update (&st, tok->data, tok->datalen);
+ rspamd_cryptobox_hash_update (&st, (guchar *)&tok->data,
+ sizeof (tok->data));
}
rspamd_cryptobox_hash_final (&st, out);
gpointer bkcf;
};
-#define RSPAMD_MAX_TOKEN_LEN 8
typedef struct token_node_s {
- guchar data[RSPAMD_MAX_TOKEN_LEN];
+ guint64 data;
guint window_idx;
- guint datalen;
guint flags;
rspamd_stat_token_t *t1;
rspamd_stat_token_t *t2;
if (lua_type (L, -1) == LUA_TTABLE) {
lua_pushstring (L, "stat_metatokens");
- lua_gettable (L, -1);
-
- if (lua_type (L, -1) == LUA_TFUNCTION) {
- struct rspamd_task **ptask;
-
- ptask = lua_newuserdata (L, sizeof (*ptask));
- rspamd_lua_setclass (L, "rspamd{task}", -1);
- *ptask = task;
-
- if (lua_pcall (L, 1, 1, 0) != 0) {
- msg_err_task ("stat_metatokens failed: %s",
- lua_tostring (L, -1));
- lua_pop (L, 1);
- }
- else {
- /* Iterate over table of tables */
- for (lua_pushnil (L); lua_next (L, -2); lua_pop (L, 1)) {
- elt.flags |= RSPAMD_STAT_TOKEN_FLAG_LUA_META;
-
- if (lua_isnumber (L, -1)) {
- gdouble num = lua_tonumber (L, -1);
- guint8 *pnum = rspamd_mempool_alloc (task->task_pool,
- sizeof (num));
-
- msg_debug_task ("got metatoken number: %.2f", num);
- memcpy (pnum, &num, sizeof (num));
- elt.begin = (gchar *) pnum;
- elt.len = sizeof (num);
- g_array_append_val (ar, elt);
- }
- else if (lua_isstring (L, -1)) {
- const gchar *str;
- gsize tlen;
-
- str = lua_tolstring (L, -1, &tlen);
- guint8 *pstr = rspamd_mempool_alloc (task->task_pool,
- tlen);
- memcpy (pstr, str, tlen);
-
- msg_debug_task ("got metatoken string: %*s",
- (gint)tlen, str);
- elt.begin = (gchar *)pstr;
- elt.len = tlen;
- g_array_append_val (ar, elt);
+ lua_gettable (L, -2);
+
+ if (lua_type (L, -1) == LUA_TTABLE) {
+ lua_pushstring (L, "callback");
+ lua_gettable (L, -2);
+
+ if (lua_type (L, -1) == LUA_TFUNCTION) {
+ struct rspamd_task **ptask;
+
+ ptask = lua_newuserdata (L, sizeof (*ptask));
+ rspamd_lua_setclass (L, "rspamd{task}", -1);
+ *ptask = task;
+
+ if (lua_pcall (L, 1, 1, 0) != 0) {
+ msg_err_task ("stat_metatokens failed: %s",
+ lua_tostring (L, -1));
+ lua_pop (L, 1);
+ } else {
+ /* Iterate over table of tables */
+ for (lua_pushnil (L); lua_next (L, -2); lua_pop (L, 1)) {
+ elt.flags |= RSPAMD_STAT_TOKEN_FLAG_LUA_META;
+
+ if (lua_isnumber (L, -1)) {
+ gdouble num = lua_tonumber (L, -1);
+ guint8 *pnum = rspamd_mempool_alloc (
+ task->task_pool,
+ sizeof (num));
+
+ msg_debug_task ("got metatoken number: %.2f", num);
+ memcpy (pnum, &num, sizeof (num));
+ elt.begin = (gchar *) pnum;
+ elt.len = sizeof (num);
+ g_array_append_val (ar, elt);
+ } else if (lua_isstring (L, -1)) {
+ const gchar *str;
+ gsize tlen;
+
+ str = lua_tolstring (L, -1, &tlen);
+ guint8 *pstr = rspamd_mempool_alloc (
+ task->task_pool,
+ tlen);
+ memcpy (pstr, str, tlen);
+
+ msg_debug_task ("got metatoken string: %*s",
+ (gint) tlen, str);
+ elt.begin = (gchar *) pstr;
+ elt.len = tlen;
+ g_array_append_val (ar, elt);
+ }
}
}
}
#define ADD_TOKEN do {\
new_tok = rspamd_mempool_alloc0 (pool, token_size); \
- new_tok->datalen = sizeof (gint64); \
new_tok->flags = token_flags; \
new_tok->t1 = hashpipe[0].t; \
new_tok->t2 = hashpipe[i].t; \
((guint32)hashpipe[i].h) * primes[i << 1]; \
h2 = ((guint32)hashpipe[0].h) * primes[1] + \
((guint32)hashpipe[i].h) * primes[(i << 1) - 1]; \
- memcpy(new_tok->data, &h1, sizeof (h1)); \
- memcpy(new_tok->data + sizeof (h1), &h2, sizeof (h2)); \
+ memcpy((guchar *)&new_tok->data, &h1, sizeof (h1)); \
+ memcpy(((guchar *)&new_tok->data) + sizeof (h1), &h2, sizeof (h2)); \
} \
else { \
- cur = hashpipe[0].h * primes[0] + hashpipe[i].h * primes[i << 1]; \
- memcpy (new_tok->data, &cur, sizeof (cur)); \
+ new_tok->data = hashpipe[0].h * primes[0] + hashpipe[i].h * primes[i << 1]; \
} \
new_tok->window_idx = i + 1; \
g_ptr_array_add (result, new_tok); \
0, 0, 0, 0, 0
};
-gint
-token_node_compare_func (gconstpointer a, gconstpointer b)
-{
- const rspamd_token_t *aa = a, *bb = b;
-
- if (aa->datalen != bb->datalen) {
- return aa->datalen - bb->datalen;
- }
-
- return memcmp (aa->data, bb->data, aa->datalen);
-}
-
/* Get next word from specified f_str_t buf */
static gboolean
rspamd_tokenizer_get_word_compat (rspamd_stat_token_t * buf,