1 files changed, 170 insertions, 171 deletions
diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c
index 6709bb75a..513db9af9 100644
--- a/src/libstat/classifiers/bayes.c
+++ b/src/libstat/classifiers/bayes.c
@@ -21,25 +21,25 @@
 #include "stat_internal.h"
 #include "math.h"
 
-#define msg_err_bayes(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
-        "bayes", task->task_pool->tag.uid, \
-        RSPAMD_LOG_FUNC, \
-        __VA_ARGS__)
-#define msg_warn_bayes(...)   rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
-        "bayes", task->task_pool->tag.uid, \
-        RSPAMD_LOG_FUNC, \
-        __VA_ARGS__)
-#define msg_info_bayes(...)   rspamd_default_log_function (G_LOG_LEVEL_INFO, \
-        "bayes", task->task_pool->tag.uid, \
-        RSPAMD_LOG_FUNC, \
-        __VA_ARGS__)
+#define msg_err_bayes(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL,              \
+													   "bayes", task->task_pool->tag.uid, \
+													   RSPAMD_LOG_FUNC,                   \
+													   __VA_ARGS__)
+#define msg_warn_bayes(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING,               \
+														"bayes", task->task_pool->tag.uid, \
+														RSPAMD_LOG_FUNC,                   \
+														__VA_ARGS__)
+#define msg_info_bayes(...) rspamd_default_log_function(G_LOG_LEVEL_INFO,                  \
+														"bayes", task->task_pool->tag.uid, \
+														RSPAMD_LOG_FUNC,                   \
+														__VA_ARGS__)
 
 INIT_LOG_MODULE_PUBLIC(bayes)
 
 static inline GQuark
-bayes_error_quark (void)
+bayes_error_quark(void)
 {
-	return g_quark_from_static_string ("bayes-error");
+	return g_quark_from_static_string("bayes-error");
 }
 
 /**
@@ -50,21 +50,21 @@ bayes_error_quark (void)
  * @return
  */
 static gdouble
-inv_chi_square (struct rspamd_task *task, gdouble value, gint freedom_deg)
+inv_chi_square(struct rspamd_task *task, gdouble value, gint freedom_deg)
 {
 	double prob, sum, m;
 	gint i;
 
 	errno = 0;
 	m = -value;
-	prob = exp (value);
+	prob = exp(value);
 
 	if (errno == ERANGE) {
 		/*
 		 * e^x where x is large *NEGATIVE* number is OK, so we have a very strong
 		 * confidence that inv-chi-square is close to zero
 		 */
-		msg_debug_bayes ("exp overflow");
+		msg_debug_bayes("exp overflow");
 
 		if (value < 0) {
 			return 0;
@@ -76,7 +76,7 @@ inv_chi_square (struct rspamd_task *task, gdouble value, gint freedom_deg)
 
 	sum = prob;
 
-	msg_debug_bayes ("m: %f, probability: %g", m, prob);
+	msg_debug_bayes("m: %f, probability: %g", m, prob);
 
 	/*
 	 * m is our confidence in class
@@ -85,12 +85,12 @@ inv_chi_square (struct rspamd_task *task, gdouble value, gint freedom_deg)
 	 * from 1.0 (no confidence) to 0.0 (full confidence)
 	 */
 	for (i = 1; i < freedom_deg; i++) {
-		prob *= m / (gdouble)i;
+		prob *= m / (gdouble) i;
 		sum += prob;
-		msg_debug_bayes ("i=%d, probability: %g, sum: %g", i, prob, sum);
+		msg_debug_bayes("i=%d, probability: %g, sum: %g", i, prob, sum);
 	}
 
-	return MIN (1.0, sum);
+	return MIN(1.0, sum);
 }
 
 struct bayes_task_closure {
@@ -107,15 +107,15 @@ struct bayes_task_closure {
  * Mathematically we use pow(complexity, complexity), where complexity is the
  * window index
  */
-static const double feature_weight[] = { 0, 3125, 256, 27, 1, 0, 0, 0 };
+static const double feature_weight[] = {0, 3125, 256, 27, 1, 0, 0, 0};
 
 #define PROB_COMBINE(prob, cnt, weight, assumed) (((weight) * (assumed) + (cnt) * (prob)) / ((weight) + (cnt)))
 /*
  * In this callback we calculate local probabilities for tokens
  */
 static void
-bayes_classify_token (struct rspamd_classifier *ctx,
-		rspamd_token_t *tok, struct bayes_task_closure *cl)
+bayes_classify_token(struct rspamd_classifier *ctx,
+					 rspamd_token_t *tok, struct bayes_task_closure *cl)
 {
 	guint i;
 	gint id;
@@ -136,15 +136,15 @@ bayes_classify_token (struct rspamd_classifier *ctx,
 #endif
 
 	if (tok->flags & RSPAMD_STAT_TOKEN_FLAG_META && cl->meta_skip_prob > 0) {
-		val = rspamd_random_double_fast ();
+		val = rspamd_random_double_fast();
 
 		if (val <= cl->meta_skip_prob) {
 			if (tok->t1 && tok->t2) {
-				msg_debug_bayes (
-						"token(meta) %uL <%*s:%*s> probabilistically skipped",
-						tok->data,
-						(int) tok->t1->original.len, tok->t1->original.begin,
-						(int) tok->t2->original.len, tok->t2->original.begin);
+				msg_debug_bayes(
+					"token(meta) %uL <%*s:%*s> probabilistically skipped",
+					tok->data,
+					(int) tok->t1->original.len, tok->t1->original.begin,
+					(int) tok->t2->original.len, tok->t2->original.begin);
 			}
 
 			return;
@@ -152,9 +152,9 @@ bayes_classify_token (struct rspamd_classifier *ctx,
 	}
 
 	for (i = 0; i < ctx->statfiles_ids->len; i++) {
-		id = g_array_index (ctx->statfiles_ids, gint, i);
-		st = g_ptr_array_index (ctx->ctx->statfiles, id);
-		g_assert (st != NULL);
+		id = g_array_index(ctx->statfiles_ids, gint, i);
+		st = g_ptr_array_index(ctx->ctx->statfiles, id);
+		g_assert(st != NULL);
 		val = tok->values[id];
 
 		if (val > 0) {
@@ -172,8 +172,8 @@ bayes_classify_token (struct rspamd_classifier *ctx,
 
 	/* Probability for this token */
 	if (total_count >= ctx->cfg->min_token_hits) {
-		spam_freq = ((double)spam_count / MAX (1., (double) ctx->spam_learns));
-		ham_freq = ((double)ham_count / MAX (1., (double)ctx->ham_learns));
+		spam_freq = ((double) spam_count / MAX(1., (double) ctx->spam_learns));
+		ham_freq = ((double) ham_count / MAX(1., (double) ctx->ham_learns));
 		spam_prob = spam_freq / (spam_freq + ham_freq);
 		ham_prob = ham_freq / (spam_freq + ham_freq);
 
@@ -182,93 +182,91 @@ bayes_classify_token (struct rspamd_classifier *ctx,
 		}
 		else {
 			fw = feature_weight[tok->window_idx %
-					G_N_ELEMENTS (feature_weight)];
+								G_N_ELEMENTS(feature_weight)];
 		}
 
 
 		w = (fw * total_count) / (1.0 + fw * total_count);
 
-		bayes_spam_prob = PROB_COMBINE (spam_prob, total_count, w, 0.5);
+		bayes_spam_prob = PROB_COMBINE(spam_prob, total_count, w, 0.5);
 
 		if ((bayes_spam_prob > 0.5 && bayes_spam_prob < 0.5 + ctx->cfg->min_prob_strength) ||
 			(bayes_spam_prob < 0.5 && bayes_spam_prob > 0.5 - ctx->cfg->min_prob_strength)) {
-			msg_debug_bayes (
-					"token %uL <%*s:%*s> skipped, probability not in range: %f",
-					tok->data,
-					(int) tok->t1->stemmed.len, tok->t1->stemmed.begin,
-					(int) tok->t2->stemmed.len, tok->t2->stemmed.begin,
-					bayes_spam_prob);
+			msg_debug_bayes(
+				"token %uL <%*s:%*s> skipped, probability not in range: %f",
+				tok->data,
+				(int) tok->t1->stemmed.len, tok->t1->stemmed.begin,
+				(int) tok->t2->stemmed.len, tok->t2->stemmed.begin,
+				bayes_spam_prob);
 
 			return;
 		}
 
-		bayes_ham_prob = PROB_COMBINE (ham_prob, total_count, w, 0.5);
+		bayes_ham_prob = PROB_COMBINE(ham_prob, total_count, w, 0.5);
 
-		cl->spam_prob += log (bayes_spam_prob);
-		cl->ham_prob += log (bayes_ham_prob);
-		cl->processed_tokens ++;
+		cl->spam_prob += log(bayes_spam_prob);
+		cl->ham_prob += log(bayes_ham_prob);
+		cl->processed_tokens++;
 
 		if (!(tok->flags & RSPAMD_STAT_TOKEN_FLAG_META)) {
-			cl->text_tokens ++;
+			cl->text_tokens++;
 		}
 		else {
 			token_type = "meta";
 		}
 
 		if (tok->t1 && tok->t2) {
-			msg_debug_bayes ("token(%s) %uL <%*s:%*s>: weight: %f, cf: %f, "
-					"total_count: %ud, "
-					"spam_count: %ud, ham_count: %ud,"
-					"spam_prob: %.3f, ham_prob: %.3f, "
-					"bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, "
-					"current spam probability: %.3f, current ham probability: %.3f",
-					token_type,
-					tok->data,
-					(int) tok->t1->stemmed.len, tok->t1->stemmed.begin,
-					(int) tok->t2->stemmed.len, tok->t2->stemmed.begin,
-					fw, w, total_count, spam_count, ham_count,
-					spam_prob, ham_prob,
-					bayes_spam_prob, bayes_ham_prob,
-					cl->spam_prob, cl->ham_prob);
+			msg_debug_bayes("token(%s) %uL <%*s:%*s>: weight: %f, cf: %f, "
+							"total_count: %ud, "
+							"spam_count: %ud, ham_count: %ud,"
+							"spam_prob: %.3f, ham_prob: %.3f, "
+							"bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, "
+							"current spam probability: %.3f, current ham probability: %.3f",
+							token_type,
+							tok->data,
+							(int) tok->t1->stemmed.len, tok->t1->stemmed.begin,
+							(int) tok->t2->stemmed.len, tok->t2->stemmed.begin,
+							fw, w, total_count, spam_count, ham_count,
+							spam_prob, ham_prob,
+							bayes_spam_prob, bayes_ham_prob,
+							cl->spam_prob, cl->ham_prob);
 		}
 		else {
-			msg_debug_bayes ("token(%s) %uL <?:?>: weight: %f, cf: %f, "
-					"total_count: %ud, "
-					"spam_count: %ud, ham_count: %ud,"
-					"spam_prob: %.3f, ham_prob: %.3f, "
-					"bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, "
-					"current spam probability: %.3f, current ham probability: %.3f",
-					token_type,
-					tok->data,
-					fw, w, total_count, spam_count, ham_count,
-					spam_prob, ham_prob,
-					bayes_spam_prob, bayes_ham_prob,
-					cl->spam_prob, cl->ham_prob);
+			msg_debug_bayes("token(%s) %uL <?:?>: weight: %f, cf: %f, "
+							"total_count: %ud, "
+							"spam_count: %ud, ham_count: %ud,"
+							"spam_prob: %.3f, ham_prob: %.3f, "
+							"bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, "
+							"current spam probability: %.3f, current ham probability: %.3f",
+							token_type,
+							tok->data,
+							fw, w, total_count, spam_count, ham_count,
+							spam_prob, ham_prob,
+							bayes_spam_prob, bayes_ham_prob,
+							cl->spam_prob, cl->ham_prob);
 		}
 	}
 }
 
 
-
 gboolean
-bayes_init (struct rspamd_config *cfg,
-			struct ev_loop *ev_base,
-			struct rspamd_classifier *cl)
+bayes_init(struct rspamd_config *cfg,
+		   struct ev_loop *ev_base,
+		   struct rspamd_classifier *cl)
 {
 	cl->cfg->flags |= RSPAMD_FLAG_CLASSIFIER_INTEGER;
 
 	return TRUE;
 }
 
-void
-bayes_fin (struct rspamd_classifier *cl)
+void bayes_fin(struct rspamd_classifier *cl)
 {
 }
 
 gboolean
-bayes_classify (struct rspamd_classifier * ctx,
-		GPtrArray *tokens,
-		struct rspamd_task *task)
+bayes_classify(struct rspamd_classifier *ctx,
+			   GPtrArray *tokens,
+			   struct rspamd_task *task)
 {
 	double final_prob, h, s, *pprob;
 	gchar sumbuf[32];
@@ -278,41 +276,41 @@ bayes_classify (struct rspamd_classifier * ctx,
 	guint i, text_tokens = 0;
 	gint id;
 
-	g_assert (ctx != NULL);
-	g_assert (tokens != NULL);
+	g_assert(ctx != NULL);
+	g_assert(tokens != NULL);
 
-	memset (&cl, 0, sizeof (cl));
+	memset(&cl, 0, sizeof(cl));
 	cl.task = task;
 
 	/* Check min learns */
 	if (ctx->cfg->min_learns > 0) {
 		if (ctx->ham_learns < ctx->cfg->min_learns) {
-			msg_info_task ("not classified as ham. The ham class needs more "
-					"training samples. Currently: %ul; minimum %ud required",
-					ctx->ham_learns, ctx->cfg->min_learns);
+			msg_info_task("not classified as ham. The ham class needs more "
+						  "training samples. Currently: %ul; minimum %ud required",
+						  ctx->ham_learns, ctx->cfg->min_learns);
 
 			return TRUE;
 		}
 		if (ctx->spam_learns < ctx->cfg->min_learns) {
-			msg_info_task ("not classified as spam. The spam class needs more "
-					"training samples. Currently: %ul; minimum %ud required",
-					ctx->spam_learns, ctx->cfg->min_learns);
+			msg_info_task("not classified as spam. The spam class needs more "
+						  "training samples. Currently: %ul; minimum %ud required",
+						  ctx->spam_learns, ctx->cfg->min_learns);
 
 			return TRUE;
 		}
 	}
 
-	for (i = 0; i < tokens->len; i ++) {
-		tok = g_ptr_array_index (tokens, i);
+	for (i = 0; i < tokens->len; i++) {
+		tok = g_ptr_array_index(tokens, i);
 		if (!(tok->flags & RSPAMD_STAT_TOKEN_FLAG_META)) {
-			text_tokens ++;
+			text_tokens++;
 		}
 	}
 
 	if (text_tokens == 0) {
-		msg_info_task ("skipped classification as there are no text tokens. "
-				"Total tokens: %ud",
-				tokens->len);
+		msg_info_task("skipped classification as there are no text tokens. "
+					  "Total tokens: %ud",
+					  tokens->len);
 
 		return TRUE;
 	}
@@ -327,42 +325,42 @@ bayes_classify (struct rspamd_classifier * ctx,
 		cl.meta_skip_prob = 1.0 - text_tokens / tokens->len;
 	}
 
-	for (i = 0; i < tokens->len; i ++) {
-		tok = g_ptr_array_index (tokens, i);
+	for (i = 0; i < tokens->len; i++) {
+		tok = g_ptr_array_index(tokens, i);
 
-		bayes_classify_token (ctx, tok, &cl);
+		bayes_classify_token(ctx, tok, &cl);
 	}
 
 	if (cl.processed_tokens == 0) {
-		msg_info_bayes ("no tokens found in bayes database "
-				  "(%ud total tokens, %ud text tokens), ignore stats",
-				tokens->len, text_tokens);
+		msg_info_bayes("no tokens found in bayes database "
+					   "(%ud total tokens, %ud text tokens), ignore stats",
+					   tokens->len, text_tokens);
 
 		return TRUE;
 	}
 
 	if (ctx->cfg->min_tokens > 0 &&
-		cl.text_tokens < (gint)(ctx->cfg->min_tokens * 0.1)) {
-		msg_info_bayes ("ignore bayes probability since we have "
-						"found too few text tokens: %uL (of %ud checked), "
-						"at least %d required",
-						cl.text_tokens,
-						text_tokens,
-						(gint)(ctx->cfg->min_tokens * 0.1));
+		cl.text_tokens < (gint) (ctx->cfg->min_tokens * 0.1)) {
+		msg_info_bayes("ignore bayes probability since we have "
+					   "found too few text tokens: %uL (of %ud checked), "
+					   "at least %d required",
+					   cl.text_tokens,
+					   text_tokens,
+					   (gint) (ctx->cfg->min_tokens * 0.1));
 
 		return TRUE;
 	}
 
 	if (cl.spam_prob > -300 && cl.ham_prob > -300) {
 		/* Fisher value is low enough to apply inv_chi_square */
-		h = 1 - inv_chi_square (task, cl.spam_prob, cl.processed_tokens);
-		s = 1 - inv_chi_square (task, cl.ham_prob, cl.processed_tokens);
+		h = 1 - inv_chi_square(task, cl.spam_prob, cl.processed_tokens);
+		s = 1 - inv_chi_square(task, cl.ham_prob, cl.processed_tokens);
 	}
 	else {
 		/* Use naive method */
 		if (cl.spam_prob < cl.ham_prob) {
 			h = (1.0 - exp(cl.spam_prob - cl.ham_prob)) /
-					(1.0 + exp(cl.spam_prob - cl.ham_prob));
+				(1.0 + exp(cl.spam_prob - cl.ham_prob));
 			s = 1.0 - h;
 		}
 		else {
@@ -372,51 +370,51 @@ bayes_classify (struct rspamd_classifier * ctx,
 		}
 	}
 
-	if (isfinite (s) && isfinite (h)) {
+	if (isfinite(s) && isfinite(h)) {
 		final_prob = (s + 1.0 - h) / 2.;
-		msg_debug_bayes (
-				"got ham probability %.2f -> %.2f and spam probability %.2f -> %.2f,"
-				" %L tokens processed of %ud total tokens;"
-				" %uL text tokens found of %ud text tokens)",
-				cl.ham_prob,
-				h,
-				cl.spam_prob,
-				s,
-				cl.processed_tokens,
-				tokens->len,
-				cl.text_tokens,
-				text_tokens);
+		msg_debug_bayes(
+			"got ham probability %.2f -> %.2f and spam probability %.2f -> %.2f,"
+			" %L tokens processed of %ud total tokens;"
+			" %uL text tokens found of %ud text tokens)",
+			cl.ham_prob,
+			h,
+			cl.spam_prob,
+			s,
+			cl.processed_tokens,
+			tokens->len,
+			cl.text_tokens,
+			text_tokens);
 	}
 	else {
 		/*
 		 * We have some overflow, hence we need to check which class
 		 * is NaN
 		 */
-		if (isfinite (h)) {
+		if (isfinite(h)) {
 			final_prob = 1.0;
-			msg_debug_bayes ("spam class is full: no"
-					" ham samples");
+			msg_debug_bayes("spam class is full: no"
+							" ham samples");
 		}
-		else if (isfinite (s)) {
+		else if (isfinite(s)) {
 			final_prob = 0.0;
-			msg_debug_bayes ("ham class is full: no"
-					" spam samples");
+			msg_debug_bayes("ham class is full: no"
+							" spam samples");
 		}
 		else {
 			final_prob = 0.5;
-			msg_warn_bayes ("spam and ham classes are both full");
+			msg_warn_bayes("spam and ham classes are both full");
 		}
 	}
 
-	pprob = rspamd_mempool_alloc (task->task_pool, sizeof (*pprob));
+	pprob = rspamd_mempool_alloc(task->task_pool, sizeof(*pprob));
 	*pprob = final_prob;
-	rspamd_mempool_set_variable (task->task_pool, "bayes_prob", pprob, NULL);
+	rspamd_mempool_set_variable(task->task_pool, "bayes_prob", pprob, NULL);
 
-	if (cl.processed_tokens > 0 && fabs (final_prob - 0.5) > 0.05) {
+	if (cl.processed_tokens > 0 && fabs(final_prob - 0.5) > 0.05) {
 		/* Now we can have exactly one HAM and exactly one SPAM statfiles per classifier */
 		for (i = 0; i < ctx->statfiles_ids->len; i++) {
-			id = g_array_index (ctx->statfiles_ids, gint, i);
-			st = g_ptr_array_index (ctx->ctx->statfiles, id);
+			id = g_array_index(ctx->statfiles_ids, gint, i);
+			st = g_ptr_array_index(ctx->ctx->statfiles, id);
 
 			if (final_prob > 0.5 && st->stcf->is_spam) {
 				break;
@@ -435,14 +433,15 @@ bayes_classify (struct rspamd_classifier * ctx,
 		 * Bayes p is from 0.5 to 1.0, but confidence is from 0 to 1, so
 		 * we need to rescale it to display correctly
 		 */
-		rspamd_snprintf (sumbuf, sizeof (sumbuf), "%.2f%%",
-				(final_prob - 0.5) * 200.);
-		final_prob = rspamd_normalize_probability (final_prob, 0.5);
-		g_assert (st != NULL);
+		rspamd_snprintf(sumbuf, sizeof(sumbuf), "%.2f%%",
+						(final_prob - 0.5) * 200.);
+		final_prob = rspamd_normalize_probability(final_prob, 0.5);
+		g_assert(st != NULL);
 
 		if (final_prob > 1 || final_prob < 0) {
-			msg_err_bayes ("internal error: probability %f is outside of the "
-				  "allowed range [0..1]", final_prob);
+			msg_err_bayes("internal error: probability %f is outside of the "
+						  "allowed range [0..1]",
+						  final_prob);
 
 			if (final_prob > 1) {
 				final_prob = 1.0;
@@ -452,22 +451,22 @@ bayes_classify (struct rspamd_classifier * ctx,
 			}
 		}
 
-		rspamd_task_insert_result (task,
-				st->stcf->symbol,
-				final_prob,
-				sumbuf);
+		rspamd_task_insert_result(task,
+								  st->stcf->symbol,
+								  final_prob,
+								  sumbuf);
 	}
 
 	return TRUE;
 }
 
 gboolean
-bayes_learn_spam (struct rspamd_classifier * ctx,
-		GPtrArray *tokens,
-		struct rspamd_task *task,
-		gboolean is_spam,
-		gboolean unlearn,
-		GError **err)
+bayes_learn_spam(struct rspamd_classifier *ctx,
+				 GPtrArray *tokens,
+				 struct rspamd_task *task,
+				 gboolean is_spam,
+				 gboolean unlearn,
+				 GError **err)
 {
 	guint i, j, total_cnt, spam_cnt, ham_cnt;
 	gint id;
@@ -475,8 +474,8 @@ bayes_learn_spam (struct rspamd_classifier * ctx,
 	rspamd_token_t *tok;
 	gboolean incrementing;
 
-	g_assert (ctx != NULL);
-	g_assert (tokens != NULL);
+	g_assert(ctx != NULL);
+	g_assert(tokens != NULL);
 
 	incrementing = ctx->cfg->flags & RSPAMD_FLAG_CLASSIFIER_INCREMENTING_BACKEND;
 
@@ -484,12 +483,12 @@ bayes_learn_spam (struct rspamd_classifier * ctx,
 		total_cnt = 0;
 		spam_cnt = 0;
 		ham_cnt = 0;
-		tok = g_ptr_array_index (tokens, i);
+		tok = g_ptr_array_index(tokens, i);
 
 		for (j = 0; j < ctx->statfiles_ids->len; j++) {
-			id = g_array_index (ctx->statfiles_ids, gint, j);
-			st = g_ptr_array_index (ctx->ctx->statfiles, id);
-			g_assert (st != NULL);
+			id = g_array_index(ctx->statfiles_ids, gint, j);
+			st = g_ptr_array_index(ctx->ctx->statfiles, id);
+			g_assert(st != NULL);
 
 			if (!!st->stcf->is_spam == !!is_spam) {
 				if (incrementing) {
@@ -533,18 +532,18 @@ bayes_learn_spam (struct rspamd_classifier * ctx,
 		}
 
 		if (tok->t1 && tok->t2) {
-			msg_debug_bayes ("token %uL <%*s:%*s>: window: %d, total_count: %d, "
-					"spam_count: %d, ham_count: %d",
-					tok->data,
-					(int) tok->t1->stemmed.len, tok->t1->stemmed.begin,
-					(int) tok->t2->stemmed.len, tok->t2->stemmed.begin,
-					tok->window_idx, total_cnt, spam_cnt, ham_cnt);
+			msg_debug_bayes("token %uL <%*s:%*s>: window: %d, total_count: %d, "
+							"spam_count: %d, ham_count: %d",
+							tok->data,
+							(int) tok->t1->stemmed.len, tok->t1->stemmed.begin,
+							(int) tok->t2->stemmed.len, tok->t2->stemmed.begin,
+							tok->window_idx, total_cnt, spam_cnt, ham_cnt);
 		}
 		else {
-			msg_debug_bayes ("token %uL <?:?>: window: %d, total_count: %d, "
-					"spam_count: %d, ham_count: %d",
-					tok->data,
-					tok->window_idx, total_cnt, spam_cnt, ham_cnt);
+			msg_debug_bayes("token %uL <?:?>: window: %d, total_count: %d, "
+							"spam_count: %d, ham_count: %d",
+							tok->data,
+							tok->window_idx, total_cnt, spam_cnt, ham_cnt);
 		}
 	}