@@ -99,7 +99,7 @@ struct rspamd_stat_backend { | |||
gpointer ctx); \ | |||
ucl_object_t * rspamd_##name##_get_stat (gpointer runtime, \ | |||
gpointer ctx); \ | |||
void rspamd_##name##_load_tokenizer_config (gpointer runtime, \ | |||
gpointer rspamd_##name##_load_tokenizer_config (gpointer runtime, \ | |||
gsize *len); \ | |||
void rspamd_##name##_close (gpointer ctx) | |||
@@ -120,7 +120,8 @@ rspamd_mmaped_file_t * rspamd_mmaped_file_is_open ( | |||
rspamd_mmaped_file_t * rspamd_mmaped_file_open (rspamd_mmaped_file_ctx * pool, | |||
const gchar *filename, size_t size, struct rspamd_statfile_config *stcf); | |||
gint rspamd_mmaped_file_create (rspamd_mmaped_file_ctx * pool, | |||
const gchar *filename, size_t size, struct rspamd_statfile_config *stcf); | |||
const gchar *filename, size_t size, struct rspamd_statfile_config *stcf, | |||
rspamd_mempool_t *mempool); | |||
double | |||
rspamd_mmaped_file_get_block (rspamd_mmaped_file_ctx * pool, | |||
@@ -452,7 +453,7 @@ rspamd_mmaped_file_reindex (rspamd_mmaped_file_ctx * pool, | |||
} | |||
/* Now create new file with required size */ | |||
if (rspamd_mmaped_file_create (pool, filename, size, stcf) != 0) { | |||
if (rspamd_mmaped_file_create (pool, filename, size, stcf, pool->pool) != 0) { | |||
msg_err ("cannot create new file"); | |||
g_free (backup); | |||
return NULL; | |||
@@ -543,8 +544,6 @@ rspamd_mmaped_file_open (rspamd_mmaped_file_ctx * pool, | |||
{ | |||
struct stat st; | |||
rspamd_mmaped_file_t *new_file; | |||
struct rspamd_stat_tokenizer *tokenizer; | |||
struct stat_file_header *header; | |||
if ((new_file = rspamd_mmaped_file_is_open (pool, stcf)) != NULL) { | |||
return new_file; | |||
@@ -615,22 +614,7 @@ rspamd_mmaped_file_open (rspamd_mmaped_file_ctx * pool, | |||
rspamd_mmaped_file_preload (new_file); | |||
/* Check tokenizer compatibility */ | |||
header = new_file->map; | |||
g_assert (stcf->clcf != NULL); | |||
g_assert (stcf->clcf->tokenizer != NULL); | |||
tokenizer = rspamd_stat_get_tokenizer (stcf->clcf->tokenizer->name); | |||
g_assert (tokenizer != NULL); | |||
if (!tokenizer->compatible_config (stcf->clcf->tokenizer, header->unused, | |||
header->tokenizer_conf_len)) { | |||
msg_err ("mmapped statfile %s is not compatible with the tokenizer " | |||
"defined", new_file->filename); | |||
munmap (new_file->map, st.st_size); | |||
g_slice_free1 (sizeof (*new_file), new_file); | |||
return NULL; | |||
} | |||
g_hash_table_insert (pool->files, stcf, new_file); | |||
@@ -664,7 +648,7 @@ rspamd_mmaped_file_close_file (rspamd_mmaped_file_ctx * pool, | |||
gint | |||
rspamd_mmaped_file_create (rspamd_mmaped_file_ctx * pool, const gchar *filename, | |||
size_t size, struct rspamd_statfile_config *stcf) | |||
size_t size, struct rspamd_statfile_config *stcf, rspamd_mempool_t *mempool) | |||
{ | |||
struct stat_file_header header = { | |||
.magic = {'r', 's', 'd'}, | |||
@@ -722,7 +706,7 @@ rspamd_mmaped_file_create (rspamd_mmaped_file_ctx * pool, const gchar *filename, | |||
g_assert (stcf->clcf->tokenizer != NULL); | |||
tokenizer = rspamd_stat_get_tokenizer (stcf->clcf->tokenizer->name); | |||
g_assert (tokenizer != NULL); | |||
tok_conf = tokenizer->get_config (stcf->clcf->tokenizer, &tok_conf_len); | |||
tok_conf = tokenizer->get_config (mempool, stcf->clcf->tokenizer, &tok_conf_len); | |||
header.tokenizer_conf_len = tok_conf_len; | |||
g_assert (tok_conf_len < sizeof (header.unused) - sizeof (guint64)); | |||
memcpy (header.unused, tok_conf, tok_conf_len); | |||
@@ -819,20 +803,25 @@ rspamd_mmaped_file_init (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg) | |||
clf = cur->data; | |||
curst = clf->statfiles; | |||
while (curst) { | |||
stf = curst->data; | |||
if (clf->backend == NULL) { | |||
/* | |||
* By default, all statfiles are treated as mmaped files | |||
*/ | |||
if (stf->backend == NULL || | |||
strcmp (stf->backend, MMAPED_BACKEND_TYPE) == 0) { | |||
clf->backend = MMAPED_BACKEND_TYPE; | |||
} | |||
if (strcmp (clf->backend, MMAPED_BACKEND_TYPE) == 0) { | |||
while (curst) { | |||
stf = curst->data; | |||
/* | |||
* Check configuration sanity | |||
*/ | |||
filenameo = ucl_object_find_key (stf->opts, "filename"); | |||
if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) { | |||
filenameo = ucl_object_find_key (stf->opts, "path"); | |||
if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) { | |||
msg_err ("statfile %s has no filename defined", stf->symbol); | |||
curst = curst->next; | |||
@@ -843,6 +832,7 @@ rspamd_mmaped_file_init (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg) | |||
filename = ucl_object_tostring (filenameo); | |||
sizeo = ucl_object_find_key (stf->opts, "size"); | |||
if (sizeo == NULL || ucl_object_type (sizeo) != UCL_INT) { | |||
msg_err ("statfile %s has no size defined", stf->symbol); | |||
curst = curst->next; | |||
@@ -854,9 +844,9 @@ rspamd_mmaped_file_init (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg) | |||
rspamd_mmaped_file_open (new, filename, size, stf); | |||
ctx->statfiles ++; | |||
} | |||
curst = curst->next; | |||
curst = curst->next; | |||
} | |||
} | |||
cur = g_list_next (cur); | |||
@@ -927,7 +917,7 @@ rspamd_mmaped_file_runtime (struct rspamd_task *task, | |||
size = ucl_object_toint (sizeo); | |||
if (learn) { | |||
rspamd_mmaped_file_create (ctx, filename, size, stcf); | |||
rspamd_mmaped_file_create (ctx, filename, size, stcf, task->task_pool); | |||
} | |||
mf = rspamd_mmaped_file_open (ctx, filename, size, stcf); | |||
@@ -1095,3 +1085,20 @@ rspamd_mmaped_file_finalize_process (struct rspamd_task *task, gpointer runtime, | |||
gpointer ctx) | |||
{ | |||
} | |||
gpointer | |||
rspamd_mmaped_file_load_tokenizer_config (gpointer runtime, | |||
gsize *len) | |||
{ | |||
rspamd_mmaped_file_t *mf = runtime; | |||
struct stat_file_header *header; | |||
g_assert (mf != NULL); | |||
header = mf->map; | |||
if (len) { | |||
*len = header->tokenizer_conf_len; | |||
} | |||
return header->unused; | |||
} |
@@ -39,6 +39,7 @@ struct rspamd_tokenizer_runtime { | |||
GTree *tokens; | |||
const gchar *name; | |||
struct rspamd_stat_tokenizer *tokenizer; | |||
struct rspamd_tokenizer_config *tkcf; | |||
gpointer config; | |||
gsize conf_len; | |||
}; |
@@ -162,12 +162,13 @@ rspamd_stat_get_tokenizer_runtime (struct rspamd_tokenizer_config *cf, | |||
return NULL; | |||
} | |||
if (!tok->tokenizer->load_config (tok, conf, conf_len)) { | |||
if (!tok->tokenizer->load_config (task->task_pool, tok, conf, conf_len)) { | |||
return NULL; | |||
} | |||
tok->config = conf; | |||
tok->conf_len = conf_len; | |||
tok->tkcf = cf; | |||
tok->tokens = g_tree_new (token_node_compare_func); | |||
rspamd_mempool_add_destructor (task->task_pool, | |||
(rspamd_mempool_destruct_t)g_tree_destroy, tok->tokens); |
@@ -154,20 +154,29 @@ rspamd_tokenizer_osb_config_from_ucl (rspamd_mempool_t * pool, | |||
} | |||
gpointer | |||
rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf, | |||
rspamd_tokenizer_osb_get_config (rspamd_mempool_t *pool, | |||
struct rspamd_tokenizer_config *cf, | |||
gsize *len) | |||
{ | |||
struct rspamd_osb_tokenizer_config *osb_cf, *def; | |||
if (cf != NULL && cf->opts != NULL) { | |||
osb_cf = rspamd_tokenizer_osb_config_from_ucl (NULL, cf->opts); | |||
osb_cf = rspamd_tokenizer_osb_config_from_ucl (pool, cf->opts); | |||
} | |||
else { | |||
def = rspamd_tokenizer_osb_default_config (); | |||
osb_cf = g_slice_alloc (sizeof (*osb_cf)); | |||
osb_cf = rspamd_mempool_alloc (pool, sizeof (*osb_cf)); | |||
memcpy (osb_cf, def, sizeof (*osb_cf)); | |||
/* Do not write sipkey to statfile */ | |||
} | |||
if (osb_cf->ht == RSPAMD_OSB_HASH_SIPHASH) { | |||
msg_info ("siphash key is not stored into statfiles, so you'd need to " | |||
"keep it inside the configuration"); | |||
} | |||
memset (osb_cf->sk, 0, sizeof (osb_cf->sk)); | |||
if (len != NULL) { | |||
*len = sizeof (*osb_cf); | |||
} | |||
@@ -176,13 +185,14 @@ rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf, | |||
} | |||
gboolean | |||
rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_config *cf, | |||
rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_runtime *rt, | |||
gpointer ptr, gsize len) | |||
{ | |||
struct rspamd_osb_tokenizer_config *osb_cf, *test_cf; | |||
gboolean ret = FALSE; | |||
test_cf = rspamd_tokenizer_osb_get_config (cf, NULL); | |||
test_cf = rt->config; | |||
g_assert (test_cf != NULL); | |||
if (len == sizeof (*osb_cf)) { | |||
osb_cf = ptr; | |||
@@ -193,7 +203,8 @@ rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_config *cf, | |||
else { | |||
if (osb_cf->version == DEFAULT_OSB_VERSION) { | |||
/* We can compare them directly now */ | |||
ret = memcmp (osb_cf, test_cf, sizeof (*osb_cf)) == 0; | |||
ret = (memcmp (osb_cf, test_cf, sizeof (*osb_cf) | |||
- sizeof (osb_cf->sk))) == 0; | |||
} | |||
} | |||
} | |||
@@ -208,10 +219,9 @@ rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_config *cf, | |||
} | |||
gint | |||
rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf, | |||
rspamd_tokenizer_osb (struct rspamd_tokenizer_runtime *rt, | |||
rspamd_mempool_t * pool, | |||
GArray * input, | |||
GTree * tree, | |||
gboolean is_utf, | |||
const gchar *prefix) | |||
{ | |||
@@ -221,6 +231,7 @@ rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf, | |||
guint64 *hashpipe, cur, seed; | |||
guint32 h1, h2; | |||
guint processed = 0, i, w, window_size; | |||
GTree *tree = rt->tokens; | |||
g_assert (tree != NULL); | |||
@@ -228,13 +239,7 @@ rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf, | |||
return FALSE; | |||
} | |||
if (cf != NULL && cf->opts != NULL) { | |||
osb_cf = rspamd_tokenizer_osb_config_from_ucl (pool, cf->opts); | |||
} | |||
else { | |||
osb_cf = rspamd_tokenizer_osb_default_config (); | |||
} | |||
osb_cf = rt->config; | |||
window_size = osb_cf->window_size; | |||
if (prefix) { | |||
@@ -334,6 +339,32 @@ rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf, | |||
return TRUE; | |||
} | |||
/* | |||
* vi:ts=4 | |||
*/ | |||
gboolean | |||
rspamd_tokenizer_osb_load_config (rspamd_mempool_t *pool, | |||
struct rspamd_tokenizer_runtime *rt, | |||
gpointer ptr, gsize len) | |||
{ | |||
struct rspamd_osb_tokenizer_config *osb_cf; | |||
if (ptr == NULL) { | |||
osb_cf = rspamd_tokenizer_osb_config_from_ucl (pool, rt->tkcf->opts); | |||
} | |||
else { | |||
g_assert (len == sizeof (*osb_cf)); | |||
osb_cf = ptr; | |||
} | |||
rt->config = osb_cf; | |||
rt->conf_len = sizeof (*osb_cf); | |||
return TRUE; | |||
} | |||
gboolean | |||
rspamd_tokenizer_osb_is_compat (struct rspamd_tokenizer_runtime *rt) | |||
{ | |||
struct rspamd_osb_tokenizer_config *osb_cf = rt->config; | |||
return (osb_cf->ht == RSPAMD_OSB_HASH_COMPAT); | |||
} |
@@ -14,10 +14,12 @@ struct rspamd_tokenizer_runtime; | |||
/* Common tokenizer structure */ | |||
struct rspamd_stat_tokenizer { | |||
gchar *name; | |||
gpointer (*get_config) (struct rspamd_tokenizer_config *cf, gsize *len); | |||
gpointer (*get_config) (rspamd_mempool_t *pool, | |||
struct rspamd_tokenizer_config *cf, gsize *len); | |||
gboolean (*compatible_config) (struct rspamd_tokenizer_runtime *rt, | |||
gpointer ptr, gsize len); | |||
gboolean (*load_config) (struct rspamd_tokenizer_runtime *rt, | |||
gboolean (*load_config) (rspamd_mempool_t *pool, | |||
struct rspamd_tokenizer_runtime *rt, | |||
gpointer ptr, gsize len); | |||
gboolean (*is_compat) (struct rspamd_tokenizer_runtime *rt); | |||
gint (*tokenize_func)(struct rspamd_tokenizer_runtime *rt, | |||
@@ -43,7 +45,8 @@ gint rspamd_tokenizer_osb (struct rspamd_tokenizer_runtime *rt, | |||
gboolean is_utf, | |||
const gchar *prefix); | |||
gpointer rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf, | |||
gpointer rspamd_tokenizer_osb_get_config (rspamd_mempool_t *pool, | |||
struct rspamd_tokenizer_config *cf, | |||
gsize *len); | |||
gboolean | |||
@@ -51,7 +54,8 @@ rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_runtime *rt, | |||
gpointer ptr, gsize len); | |||
gboolean | |||
rspamd_tokenizer_osb_load_config (struct rspamd_tokenizer_runtime *rt, | |||
rspamd_tokenizer_osb_load_config (rspamd_mempool_t *pool, | |||
struct rspamd_tokenizer_runtime *rt, | |||
gpointer ptr, gsize len); | |||
gboolean |