#include "cfg_file.h"
#include "main.h"
+#include "classifiers/classifiers.h"
+#include "tokenizers/tokenizers.h"
#define YYDEBUG 1
%token REQUIRED_SCORE FUNCTION FRACT COMPOSITES CONTROL PASSWORD
%token LOGGING LOG_TYPE LOG_TYPE_CONSOLE LOG_TYPE_SYSLOG LOG_TYPE_FILE
%token LOG_LEVEL LOG_LEVEL_DEBUG LOG_LEVEL_INFO LOG_LEVEL_WARNING LOG_LEVEL_ERROR LOG_FACILITY LOG_FILENAME
-%token STATFILE ALIAS PATTERN WEIGHT STATFILE_POOL_SIZE SIZE
+%token STATFILE ALIAS PATTERN WEIGHT STATFILE_POOL_SIZE SIZE TOKENIZER CLASSIFIER
%type <string> STRING
%type <string> VARIABLE
yyerror ("yyparse: not enough arguments in statfile definition");
YYERROR;
}
+ if (cur_statfile->metric == NULL) {
+ cur_statfile->metric = memory_pool_strdup (cfg->cfg_pool, "default");
+ }
+ if (cur_statfile->classifier == NULL) {
+ cur_statfile->classifier = get_classifier ("winnow");
+ }
+ if (cur_statfile->tokenizer == NULL) {
+ cur_statfile->tokenizer = get_tokenizer ("osb-text");
+ }
g_hash_table_insert (cfg->statfiles, cur_statfile->alias, cur_statfile);
cur_statfile = NULL;
}
| statfilepattern
| statfileweight
| statfilesize
+ | statfilemetric
+ | statfiletokenizer
+ | statfileclassifier
;
statfilealias:
}
;
+statfilemetric:
+ METRIC EQSIGN QUOTEDSTRING {
+ if (cur_statfile == NULL) {
+ cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
+ }
+ cur_statfile->metric = memory_pool_strdup (cfg->cfg_pool, $3);
+ }
+ ;
+
+statfiletokenizer:
+ TOKENIZER EQSIGN QUOTEDSTRING {
+ if (cur_statfile == NULL) {
+ cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
+ }
+ if ((cur_statfile->tokenizer = get_tokenizer ($3)) == NULL) {
+ yyerror ("yyparse: unknown tokenizer %s", $3);
+ YYERROR;
+ }
+ }
+ ;
+
+statfileclassifier:
+ CLASSIFIER EQSIGN QUOTEDSTRING {
+ if (cur_statfile == NULL) {
+ cur_statfile = memory_pool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
+ }
+ if ((cur_statfile->classifier = get_classifier ($3)) == NULL) {
+ yyerror ("yyparse: unknown classifier %s", $3);
+ YYERROR;
+ }
+ }
+ ;
statfile_pool_size:
STATFILE_POOL_SIZE EQSIGN SIZELIMIT {
session->learn_rcpt = NULL;
session->learn_from = NULL;
session->learn_filename = NULL;
- session->learn_tokenizer = get_tokenizer ("osb-text");
- session->learn_classifier = get_classifier ("winnow");
+ session->learn_tokenizer = statfile->tokenizer;
+ session->learn_classifier = statfile->classifier;
/* By default learn positive */
session->in_class = 1;
/* Get all arguments */
}
session->learn_from = memory_pool_strdup (session->session_pool, arg);
break;
- case 't':
- arg = *(cmd_args + 1);
- if (!arg || *arg == '\0' || (session->learn_tokenizer = get_tokenizer (arg)) == NULL) {
- r = snprintf (out_buf, sizeof (out_buf), "tokenizer is not defined" CRLF, arg);
- bufferevent_write (session->bev, out_buf, r);
- return;
- }
- break;
- case 'c':
- arg = *(cmd_args + 1);
- if (!arg || *arg == '\0' || (session->learn_classifier = get_classifier (arg)) == NULL) {
- r = snprintf (out_buf, sizeof (out_buf), "classifier is not defined" CRLF, arg);
- bufferevent_write (session->bev, out_buf, r);
- return;
- }
- break;
case 'n':
session->in_class = 0;
break;
#include "main.h"
#include "cfg_file.h"
#include "perl.h"
+#include "util.h"
+#include "classifiers/classifiers.h"
+#include "tokenizers/tokenizers.h"
void
insert_result (struct worker_task *task, const char *metric_name, const char *symbol, u_char flag)
g_hash_table_foreach (task->cfg->composite_symbols, composites_foreach_callback, cd);
}
-void make_composites (struct worker_task *task)
+void
+make_composites (struct worker_task *task)
{
g_hash_table_foreach (task->results, composites_metric_callback, task);
}
+struct statfile_callback_data {
+ GHashTable *metrics;
+ GHashTable *tokens;
+ struct worker_task *task;
+};
+
+static void
+statfiles_callback (gpointer key, gpointer value, void *arg)
+{
+ struct statfile_callback_data *data= (struct statfile_callback_data *)arg;
+ struct worker_task *task = data->task;
+ struct statfile *st = (struct statfile *)value;
+ GTree *tokens;
+ char *filename;
+ double weight, *w;
+
+ if (g_list_length (task->rcpt) == 1) {
+ filename = resolve_stat_filename (task->task_pool, st->pattern, task->from, (char *)task->rcpt->data);
+ }
+ else {
+ /* XXX: handle multiply recipients correctly */
+ filename = resolve_stat_filename (task->task_pool, st->pattern, task->from, "");
+ }
+
+ if (statfile_pool_open (task->worker->srv->statfile_pool, filename) == -1) {
+ return;
+ }
+
+ if ((tokens = g_hash_table_lookup (data->tokens, st->tokenizer)) == NULL) {
+ /* Tree would be freed at task pool freeing */
+ tokens = st->tokenizer->tokenize_func (st->tokenizer, task->task_pool, task->msg->buf);
+ if (tokens == NULL) {
+ msg_info ("statfiles_callback: cannot tokenize input");
+ return;
+ }
+ g_hash_table_insert (data->tokens, st->tokenizer, tokens);
+ }
+
+ weight = st->classifier->classify_func (task->worker->srv->statfile_pool, filename, tokens);
+
+ if (weight > 0.000001) {
+ if ((w = g_hash_table_lookup (data->metrics, st->metric)) == NULL) {
+ w = memory_pool_alloc (task->task_pool, sizeof (double));
+ *w = weight * st->weight;
+ g_hash_table_insert (data->metrics, st->metric, w);
+ }
+ else {
+ *w += weight * st->weight;
+ }
+ }
+
+}
+
+static void
+statfiles_results_callback (gpointer key, gpointer value, void *arg)
+{
+ struct worker_task *task = (struct worker_task *)arg;
+ struct metric_result *metric_res;
+ struct metric *metric;
+ double w;
+
+ metric_res = g_hash_table_lookup (task->results, (char *)key);
+ w = *(double *)value;
+
+ metric = g_hash_table_lookup (task->worker->srv->cfg->metrics, (char *)key);
+ if (metric == NULL) {
+ return;
+ }
+
+ if (metric_res == NULL) {
+ /* Create new metric chain */
+ metric_res = memory_pool_alloc (task->task_pool, sizeof (struct metric_result));
+ metric_res->symbols = g_hash_table_new (g_str_hash, g_str_equal);
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_hash_table_destroy, metric_res->symbols);
+ metric_res->metric = metric;
+ metric_res->score = w;
+ g_hash_table_insert (task->results, key, metric_res);
+ }
+ else {
+ metric_res->score += w;
+ }
+ g_hash_table_insert (metric_res->symbols, key, GSIZE_TO_POINTER (1));
+
+}
+
+
+void
+process_statfiles (struct worker_task *task)
+{
+ struct statfile_callback_data cd;
+
+ cd.task = task;
+ cd.tokens = g_hash_table_new (g_direct_hash, g_direct_equal);
+ cd.metrics = g_hash_table_new (g_str_hash, g_str_equal);
+
+ g_hash_table_foreach (task->cfg->statfiles, statfiles_callback, &cd);
+ g_hash_table_foreach (cd.metrics, statfiles_results_callback, task);
+
+ g_hash_table_destroy (cd.tokens);
+ g_hash_table_destroy (cd.metrics);
+}
/*
* vi:ts=4