guint64 total_hits;
double local_probability;
double post_probability;
- guint value;
+ guint64 value;
struct statfile *st;
stat_file_t *file;
};
cur->post_probability = G_MINDOUBLE * 100;
}
if (cd->ctx->debug) {
- msg_info ("token: %s, statfile: %s, probability: %.4f, post_probability: %.4f",
+ msg_info ("token: %s, statfile: %s, probability: %uL, post_probability: %.4f",
node->extra, cur->st->symbol, cur->value, cur->post_probability);
}
}
c.len = strlen (cur->data);
if (c.len > 0) {
c.begin = cur->data;
- if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, TRUE)) {
+ if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, TRUE, FALSE, NULL)) {
msg_info ("cannot tokenize input");
return;
}
c.begin = text_part->content->data;
c.len = text_part->content->len;
/* Tree would be freed at task pool freeing */
- if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, TRUE)) {
+ if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, TRUE,
+ text_part->is_utf, text_part->urls_offset)) {
msg_info ("cannot tokenize input");
return;
}