summaryrefslogtreecommitdiffstats
path: root/src/tokenizers
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2010-12-24 18:57:55 +0300
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2010-12-24 18:57:55 +0300
commit6ae69bd5f741a04b9c38a7de83f6b6ed263e1c1f (patch)
tree29d90efa9d0457c2bf611ef5ce37533109a422b2 /src/tokenizers
parentdff94626ac8a2edd24a8524cdbb00dd7dc2222d8 (diff)
downloadrspamd-6ae69bd5f741a04b9c38a7de83f6b6ed263e1c1f.tar.gz
rspamd-6ae69bd5f741a04b9c38a7de83f6b6ed263e1c1f.zip
* Add Subject header to statistics
* Write log message about symbols that are removed when composite symbol is inserted
Diffstat (limited to 'src/tokenizers')
-rw-r--r--src/tokenizers/tokenizers.c38
-rw-r--r--src/tokenizers/tokenizers.h2
2 files changed, 40 insertions, 0 deletions
diff --git a/src/tokenizers/tokenizers.c b/src/tokenizers/tokenizers.c
index 9ca690e47..ab073a28c 100644
--- a/src/tokenizers/tokenizers.c
+++ b/src/tokenizers/tokenizers.c
@@ -222,6 +222,44 @@ tokenize_headers (memory_pool_t * pool, struct worker_task *task, GTree ** tree)
return TRUE;
}
+void
+tokenize_subject (struct worker_task *task, GTree ** tree)
+{
+ f_str_t subject, subject_name;
+ const gchar *sub;
+ token_node_t *new = NULL;
+
+ if (*tree == NULL) {
+ *tree = g_tree_new (token_node_compare_func);
+ memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_tree_destroy, *tree);
+ }
+
+ subject_name.begin = "Subject:";
+ subject_name.len = sizeof ("Subject:") - 1;
+
+ /* Try to use pre-defined subject */
+ if (task->subject != NULL) {
+ new = memory_pool_alloc (task->task_pool, sizeof (token_node_t));
+ subject.begin = task->subject;
+ subject.len = strlen (task->subject);
+ new->h1 = fstrhash (&subject_name) * primes[0];
+ new->h2 = fstrhash (&subject) * primes[1];
+ if (g_tree_lookup (*tree, new) == NULL) {
+ g_tree_insert (*tree, new, new);
+ }
+ }
+ if ((sub = g_mime_message_get_subject (task->message)) != NULL) {
+ new = memory_pool_alloc (task->task_pool, sizeof (token_node_t));
+ subject.begin = (gchar *)sub;
+ subject.len = strlen (sub);
+ new->h1 = fstrhash (&subject_name) * primes[0];
+ new->h2 = fstrhash (&subject) * primes[1];
+ if (g_tree_lookup (*tree, new) == NULL) {
+ g_tree_insert (*tree, new, new);
+ }
+ }
+}
+
/*
* vi:ts=4
*/
diff --git a/src/tokenizers/tokenizers.h b/src/tokenizers/tokenizers.h
index 9a16e907c..21e454e6b 100644
--- a/src/tokenizers/tokenizers.h
+++ b/src/tokenizers/tokenizers.h
@@ -40,6 +40,8 @@ int osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t
int tokenize_urls (memory_pool_t *pool, struct worker_task *task, GTree **cur);
/* Common tokenizer for headers */
int tokenize_headers (memory_pool_t *pool, struct worker_task *task, GTree **cur);
+/* Make tokens for a subject */
+void tokenize_subject (struct worker_task *task, GTree ** tree);
/* Array of all defined tokenizers */
extern struct tokenizer tokenizers[];