浏览代码

Fix tokenizers and mmapped file.

tags/1.0.0
Vsevolod Stakhov 9 年前
父节点
当前提交
d4fbe7db61

+ 1
- 1
src/libstat/backends/backends.h 查看文件

@@ -99,7 +99,7 @@ struct rspamd_stat_backend {
gpointer ctx); \
ucl_object_t * rspamd_##name##_get_stat (gpointer runtime, \
gpointer ctx); \
void rspamd_##name##_load_tokenizer_config (gpointer runtime, \
gpointer rspamd_##name##_load_tokenizer_config (gpointer runtime, \
gsize *len); \
void rspamd_##name##_close (gpointer ctx)


+ 35
- 28
src/libstat/backends/mmaped_file.c 查看文件

@@ -120,7 +120,8 @@ rspamd_mmaped_file_t * rspamd_mmaped_file_is_open (
rspamd_mmaped_file_t * rspamd_mmaped_file_open (rspamd_mmaped_file_ctx * pool,
const gchar *filename, size_t size, struct rspamd_statfile_config *stcf);
gint rspamd_mmaped_file_create (rspamd_mmaped_file_ctx * pool,
const gchar *filename, size_t size, struct rspamd_statfile_config *stcf);
const gchar *filename, size_t size, struct rspamd_statfile_config *stcf,
rspamd_mempool_t *mempool);

double
rspamd_mmaped_file_get_block (rspamd_mmaped_file_ctx * pool,
@@ -452,7 +453,7 @@ rspamd_mmaped_file_reindex (rspamd_mmaped_file_ctx * pool,
}

/* Now create new file with required size */
if (rspamd_mmaped_file_create (pool, filename, size, stcf) != 0) {
if (rspamd_mmaped_file_create (pool, filename, size, stcf, pool->pool) != 0) {
msg_err ("cannot create new file");
g_free (backup);
return NULL;
@@ -543,8 +544,6 @@ rspamd_mmaped_file_open (rspamd_mmaped_file_ctx * pool,
{
struct stat st;
rspamd_mmaped_file_t *new_file;
struct rspamd_stat_tokenizer *tokenizer;
struct stat_file_header *header;

if ((new_file = rspamd_mmaped_file_is_open (pool, stcf)) != NULL) {
return new_file;
@@ -615,22 +614,7 @@ rspamd_mmaped_file_open (rspamd_mmaped_file_ctx * pool,

rspamd_mmaped_file_preload (new_file);

/* Check tokenizer compatibility */
header = new_file->map;
g_assert (stcf->clcf != NULL);
g_assert (stcf->clcf->tokenizer != NULL);
tokenizer = rspamd_stat_get_tokenizer (stcf->clcf->tokenizer->name);
g_assert (tokenizer != NULL);

if (!tokenizer->compatible_config (stcf->clcf->tokenizer, header->unused,
header->tokenizer_conf_len)) {
msg_err ("mmapped statfile %s is not compatible with the tokenizer "
"defined", new_file->filename);
munmap (new_file->map, st.st_size);
g_slice_free1 (sizeof (*new_file), new_file);

return NULL;
}

g_hash_table_insert (pool->files, stcf, new_file);

@@ -664,7 +648,7 @@ rspamd_mmaped_file_close_file (rspamd_mmaped_file_ctx * pool,

gint
rspamd_mmaped_file_create (rspamd_mmaped_file_ctx * pool, const gchar *filename,
size_t size, struct rspamd_statfile_config *stcf)
size_t size, struct rspamd_statfile_config *stcf, rspamd_mempool_t *mempool)
{
struct stat_file_header header = {
.magic = {'r', 's', 'd'},
@@ -722,7 +706,7 @@ rspamd_mmaped_file_create (rspamd_mmaped_file_ctx * pool, const gchar *filename,
g_assert (stcf->clcf->tokenizer != NULL);
tokenizer = rspamd_stat_get_tokenizer (stcf->clcf->tokenizer->name);
g_assert (tokenizer != NULL);
tok_conf = tokenizer->get_config (stcf->clcf->tokenizer, &tok_conf_len);
tok_conf = tokenizer->get_config (mempool, stcf->clcf->tokenizer, &tok_conf_len);
header.tokenizer_conf_len = tok_conf_len;
g_assert (tok_conf_len < sizeof (header.unused) - sizeof (guint64));
memcpy (header.unused, tok_conf, tok_conf_len);
@@ -819,20 +803,25 @@ rspamd_mmaped_file_init (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg)
clf = cur->data;

curst = clf->statfiles;
while (curst) {
stf = curst->data;

if (clf->backend == NULL) {
/*
* By default, all statfiles are treated as mmaped files
*/
if (stf->backend == NULL ||
strcmp (stf->backend, MMAPED_BACKEND_TYPE) == 0) {
clf->backend = MMAPED_BACKEND_TYPE;
}

if (strcmp (clf->backend, MMAPED_BACKEND_TYPE) == 0) {
while (curst) {
stf = curst->data;
/*
* Check configuration sanity
*/
filenameo = ucl_object_find_key (stf->opts, "filename");

if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) {
filenameo = ucl_object_find_key (stf->opts, "path");

if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) {
msg_err ("statfile %s has no filename defined", stf->symbol);
curst = curst->next;
@@ -843,6 +832,7 @@ rspamd_mmaped_file_init (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg)
filename = ucl_object_tostring (filenameo);

sizeo = ucl_object_find_key (stf->opts, "size");

if (sizeo == NULL || ucl_object_type (sizeo) != UCL_INT) {
msg_err ("statfile %s has no size defined", stf->symbol);
curst = curst->next;
@@ -854,9 +844,9 @@ rspamd_mmaped_file_init (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg)
rspamd_mmaped_file_open (new, filename, size, stf);

ctx->statfiles ++;
}

curst = curst->next;
curst = curst->next;
}
}

cur = g_list_next (cur);
@@ -927,7 +917,7 @@ rspamd_mmaped_file_runtime (struct rspamd_task *task,
size = ucl_object_toint (sizeo);

if (learn) {
rspamd_mmaped_file_create (ctx, filename, size, stcf);
rspamd_mmaped_file_create (ctx, filename, size, stcf, task->task_pool);
}

mf = rspamd_mmaped_file_open (ctx, filename, size, stcf);
@@ -1095,3 +1085,20 @@ rspamd_mmaped_file_finalize_process (struct rspamd_task *task, gpointer runtime,
gpointer ctx)
{
}

gpointer
rspamd_mmaped_file_load_tokenizer_config (gpointer runtime,
gsize *len)
{
rspamd_mmaped_file_t *mf = runtime;
struct stat_file_header *header;

g_assert (mf != NULL);
header = mf->map;

if (len) {
*len = header->tokenizer_conf_len;
}

return header->unused;
}

+ 1
- 0
src/libstat/stat_internal.h 查看文件

@@ -39,6 +39,7 @@ struct rspamd_tokenizer_runtime {
GTree *tokens;
const gchar *name;
struct rspamd_stat_tokenizer *tokenizer;
struct rspamd_tokenizer_config *tkcf;
gpointer config;
gsize conf_len;
};

+ 2
- 1
src/libstat/stat_process.c 查看文件

@@ -162,12 +162,13 @@ rspamd_stat_get_tokenizer_runtime (struct rspamd_tokenizer_config *cf,
return NULL;
}

if (!tok->tokenizer->load_config (tok, conf, conf_len)) {
if (!tok->tokenizer->load_config (task->task_pool, tok, conf, conf_len)) {
return NULL;
}

tok->config = conf;
tok->conf_len = conf_len;
tok->tkcf = cf;
tok->tokens = g_tree_new (token_node_compare_func);
rspamd_mempool_add_destructor (task->task_pool,
(rspamd_mempool_destruct_t)g_tree_destroy, tok->tokens);

+ 49
- 18
src/libstat/tokenizers/osb.c 查看文件

@@ -154,20 +154,29 @@ rspamd_tokenizer_osb_config_from_ucl (rspamd_mempool_t * pool,
}

gpointer
rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf,
rspamd_tokenizer_osb_get_config (rspamd_mempool_t *pool,
struct rspamd_tokenizer_config *cf,
gsize *len)
{
struct rspamd_osb_tokenizer_config *osb_cf, *def;

if (cf != NULL && cf->opts != NULL) {
osb_cf = rspamd_tokenizer_osb_config_from_ucl (NULL, cf->opts);
osb_cf = rspamd_tokenizer_osb_config_from_ucl (pool, cf->opts);
}
else {
def = rspamd_tokenizer_osb_default_config ();
osb_cf = g_slice_alloc (sizeof (*osb_cf));
osb_cf = rspamd_mempool_alloc (pool, sizeof (*osb_cf));
memcpy (osb_cf, def, sizeof (*osb_cf));
/* Do not write sipkey to statfile */
}

if (osb_cf->ht == RSPAMD_OSB_HASH_SIPHASH) {
msg_info ("siphash key is not stored into statfiles, so you'd need to "
"keep it inside the configuration");
}

memset (osb_cf->sk, 0, sizeof (osb_cf->sk));

if (len != NULL) {
*len = sizeof (*osb_cf);
}
@@ -176,13 +185,14 @@ rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf,
}

gboolean
rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_config *cf,
rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_runtime *rt,
gpointer ptr, gsize len)
{
struct rspamd_osb_tokenizer_config *osb_cf, *test_cf;
gboolean ret = FALSE;

test_cf = rspamd_tokenizer_osb_get_config (cf, NULL);
test_cf = rt->config;
g_assert (test_cf != NULL);

if (len == sizeof (*osb_cf)) {
osb_cf = ptr;
@@ -193,7 +203,8 @@ rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_config *cf,
else {
if (osb_cf->version == DEFAULT_OSB_VERSION) {
/* We can compare them directly now */
ret = memcmp (osb_cf, test_cf, sizeof (*osb_cf)) == 0;
ret = (memcmp (osb_cf, test_cf, sizeof (*osb_cf)
- sizeof (osb_cf->sk))) == 0;
}
}
}
@@ -208,10 +219,9 @@ rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_config *cf,
}

gint
rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf,
rspamd_tokenizer_osb (struct rspamd_tokenizer_runtime *rt,
rspamd_mempool_t * pool,
GArray * input,
GTree * tree,
gboolean is_utf,
const gchar *prefix)
{
@@ -221,6 +231,7 @@ rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf,
guint64 *hashpipe, cur, seed;
guint32 h1, h2;
guint processed = 0, i, w, window_size;
GTree *tree = rt->tokens;

g_assert (tree != NULL);

@@ -228,13 +239,7 @@ rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf,
return FALSE;
}

if (cf != NULL && cf->opts != NULL) {
osb_cf = rspamd_tokenizer_osb_config_from_ucl (pool, cf->opts);
}
else {
osb_cf = rspamd_tokenizer_osb_default_config ();
}

osb_cf = rt->config;
window_size = osb_cf->window_size;

if (prefix) {
@@ -334,6 +339,32 @@ rspamd_tokenizer_osb (struct rspamd_tokenizer_config *cf,
return TRUE;
}

/*
* vi:ts=4
*/

gboolean
rspamd_tokenizer_osb_load_config (rspamd_mempool_t *pool,
struct rspamd_tokenizer_runtime *rt,
gpointer ptr, gsize len)
{
struct rspamd_osb_tokenizer_config *osb_cf;

if (ptr == NULL) {
osb_cf = rspamd_tokenizer_osb_config_from_ucl (pool, rt->tkcf->opts);
}
else {
g_assert (len == sizeof (*osb_cf));
osb_cf = ptr;
}

rt->config = osb_cf;
rt->conf_len = sizeof (*osb_cf);

return TRUE;
}

gboolean
rspamd_tokenizer_osb_is_compat (struct rspamd_tokenizer_runtime *rt)
{
struct rspamd_osb_tokenizer_config *osb_cf = rt->config;

return (osb_cf->ht == RSPAMD_OSB_HASH_COMPAT);
}

+ 8
- 4
src/libstat/tokenizers/tokenizers.h 查看文件

@@ -14,10 +14,12 @@ struct rspamd_tokenizer_runtime;
/* Common tokenizer structure */
struct rspamd_stat_tokenizer {
gchar *name;
gpointer (*get_config) (struct rspamd_tokenizer_config *cf, gsize *len);
gpointer (*get_config) (rspamd_mempool_t *pool,
struct rspamd_tokenizer_config *cf, gsize *len);
gboolean (*compatible_config) (struct rspamd_tokenizer_runtime *rt,
gpointer ptr, gsize len);
gboolean (*load_config) (struct rspamd_tokenizer_runtime *rt,
gboolean (*load_config) (rspamd_mempool_t *pool,
struct rspamd_tokenizer_runtime *rt,
gpointer ptr, gsize len);
gboolean (*is_compat) (struct rspamd_tokenizer_runtime *rt);
gint (*tokenize_func)(struct rspamd_tokenizer_runtime *rt,
@@ -43,7 +45,8 @@ gint rspamd_tokenizer_osb (struct rspamd_tokenizer_runtime *rt,
gboolean is_utf,
const gchar *prefix);

gpointer rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf,
gpointer rspamd_tokenizer_osb_get_config (rspamd_mempool_t *pool,
struct rspamd_tokenizer_config *cf,
gsize *len);

gboolean
@@ -51,7 +54,8 @@ rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_runtime *rt,
gpointer ptr, gsize len);

gboolean
rspamd_tokenizer_osb_load_config (struct rspamd_tokenizer_runtime *rt,
rspamd_tokenizer_osb_load_config (rspamd_mempool_t *pool,
struct rspamd_tokenizer_runtime *rt,
gpointer ptr, gsize len);

gboolean

正在加载...
取消
保存