summaryrefslogtreecommitdiffstats
path: root/src/classifiers/winnow.c
blob: 552f054b2ac60b1d44f7f4ef96c6728eaaf2ad1f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/*
 * Winnow classifier
 */

#include <sys/types.h>
#include "classifiers.h"

#define WINNOW_PROMOTION 1.23
#define WINNOW_DEMOTION 0.83

struct winnow_callback_data {
	statfile_pool_t *pool;
	char *filename;
	double sum;
	int count;
	int in_class;
	time_t now;
};

static gboolean
classify_callback (gpointer key, gpointer value, gpointer data) 
{
	token_node_t *node = key;
	struct winnow_callback_data *cd = data;
	float v;
	
	/* Consider that not found blocks have value 1 */
	if ((v = statfile_pool_get_block (cd->pool, cd->filename, node->h1, node->h2, cd->now)) < 0.00001) {
		cd->sum += 1;
	}
	else {
		cd->sum += v;
	}

	cd->count ++;

	return FALSE;
}

static gboolean
learn_callback (gpointer key, gpointer value, gpointer data) 
{
	token_node_t *node = key;
	struct winnow_callback_data *cd = data;
	float v, c;

	c = (cd->in_class) ? WINNOW_PROMOTION : WINNOW_DEMOTION;

	/* Consider that not found blocks have value 1 */
	if ((v = statfile_pool_get_block (cd->pool, cd->filename, node->h1, node->h2, cd->now)) < 0.00001) {
		statfile_pool_set_block (cd->pool, cd->filename, node->h1, node->h2, cd->now, c);
	}
	else {
		statfile_pool_set_block (cd->pool, cd->filename, node->h1, node->h2, cd->now, v * c);
	}

	cd->count ++;
	
	return FALSE;
}


double 
winnow_classify (statfile_pool_t *pool, char *statfile, GTree *input)
{
	struct winnow_callback_data data;

	data.pool = pool;
	data.filename = statfile;
	data.sum = 0;
	data.count = 0;
	data.now = time (NULL);

	if (!statfile_pool_is_open (pool, statfile)) {
		if (statfile_pool_open (pool, statfile) == -1) {
			return 0;
		}
	}

	g_tree_foreach (input, classify_callback, &data);
	
	return data.sum / data.count;
}

void
winnow_learn (statfile_pool_t *pool, char *statfile, GTree *input, int in_class)
{
	struct winnow_callback_data data;

	data.pool = pool;
	data.filename = statfile;
	data.sum = 0;
	data.count = 0;
	data.in_class = in_class;
	data.now = time (NULL);

	if (!statfile_pool_is_open (pool, statfile)) {
		if (statfile_pool_open (pool, statfile) == -1) {
			return;
		}
	}

	statfile_pool_lock_file (pool, statfile);
	g_tree_foreach (input, learn_callback, &data);
	statfile_pool_unlock_file (pool, statfile);
	
}