ham_freq = ((double)ham_count / MAX (1., (double)ctx->ham_learns));
spam_prob = spam_freq / (spam_freq + ham_freq);
ham_prob = ham_freq / (spam_freq + ham_freq);
- fw = feature_weight[tok->window_idx % G_N_ELEMENTS (feature_weight)];
+
+ if (tok->flags & RSPAMD_STAT_TOKEN_FLAG_UNIGRAM) {
+ fw = 1.0;
+ }
+ else {
+ fw = feature_weight[tok->window_idx %
+ G_N_ELEMENTS (feature_weight)];
+ }
+
norm_sum = (spam_freq + ham_freq) * (spam_freq + ham_freq);
norm_sub = (spam_freq - ham_freq) * (spam_freq - ham_freq);
#define RSPAMD_STAT_TOKEN_FLAG_LUA_META (1 << 2)
#define RSPAMD_STAT_TOKEN_FLAG_EXCEPTION (1 << 3)
#define RSPAMD_STAT_TOKEN_FLAG_SUBJECT (1 << 4)
+#define RSPAMD_STAT_TOKEN_FLAG_UNIGRAM (1 << 5)
typedef struct rspamd_stat_token_s {
const gchar *begin;
}
}
+ if (token_flags & RSPAMD_STAT_TOKEN_FLAG_UNIGRAM) {
+ new_tok = rspamd_mempool_alloc0 (pool, token_size);
+ new_tok->flags = token_flags;
+ new_tok->t1 = token;
+ new_tok->t2 = token;
+ new_tok->data = cur;
+ new_tok->window_idx = 0;
+ g_ptr_array_add (result, new_tok);
+
+ continue;
+ }
+
#define ADD_TOKEN do {\
new_tok = rspamd_mempool_alloc0 (pool, token_size); \
new_tok->flags = token_flags; \