aboutsummaryrefslogtreecommitdiffstats
path: root/tests
Commit message (Collapse)AuthorAgeFilesLines
* 290741: encoding option on ICompilerConfigurationaclement2011-04-042-0/+78
|
* 290741: encoding option on ICompilerConfigurationaclement2011-04-043-55/+88
|
* removed import of MXBean - fails on build machineaclement2011-04-011-1/+1
|
* 338175: coping better with bad codeaclement2011-03-305-0/+25
|
* 1.6.12 infra + 292239aclement2011-03-236-0/+180
|
* PR336880 PR336774 PR336745aclement2011-02-109-0/+106
|
* 336471: makeEJSPaclement2011-02-083-0/+40
|
* 336158 - testcodeaclement2011-02-038-0/+21
|
* 336147 - testsaclement2011-02-039-0/+82
|
* 336136aclement2011-02-034-0/+63
|
* 335682aclement2011-02-028-3/+28
|
* 335682aclement2011-02-025-1/+68
|
* 333123aclement2011-02-021-0/+20
|
* 335783aclement2011-01-315-12/+98
|
* 335619aclement2011-01-316-0/+105
|
* 333274: testcodeaclement2011-01-2410-0/+160
|
* 333469aclement2011-01-084-0/+28
|
* 332388: test and fixaclement2010-12-141-2/+2
|
* 332388: test and fixaclement2010-12-135-0/+29
|
* 332388: test and fixaclement2010-12-136-0/+33
|
* kick buildaclement2010-12-071-1/+1
|
* genericsaclement2010-12-071-63/+60
|
* fixing linux failuresaclement2010-12-071-19/+11
|
* fixing linux testsaclement2010-12-071-7/+12
|
* fixing tests on linuxaclement2010-12-071-1/+1
|
* fixing tests on linuxaclement2010-12-071-0/+1
|
* fixing tests on linuxaclement2010-12-071-0/+1
|
* itd inners - fixed: needed a reference or wouldn't create the attribute.aclement2010-12-064-0/+35
|
* it itdsaclement2010-12-064-0/+32
|
* bit more testingaclement2010-11-273-8/+42
|
* annotation removal - extra checksaclement2010-11-274-0/+70
|
* innertype intertypeaclement2010-11-252-17/+17
|
* annotation removal testsaclement2010-11-252-0/+39
|
* intertype inner typesaclement2010-11-251-9/+9
|
* intertype inner typesaclement2010-11-245-9/+54
|
* 329925: declare @field remove annotationaclement2010-11-241-0/+32
|
* formataclement2010-11-241-110/+113
|
* 329925: declare @field remove annotationaclement2010-11-243-0/+190
|
* 329925: declare @field remove annotationaclement2010-11-2412-0/+174
|
* remove those tests for now, suite not committedaclement2010-11-131-1/+1
|
* 329111aclement2010-10-311-1/+9
|
* 329111aclement2010-10-312-2/+20
|
* 329111aclement2010-10-293-86/+143
|
* 328840aclement2010-10-271-0/+33
|
* 328840aclement2010-10-274-0/+82
|
* 328649: addDependencies for Compilation Participantsaclement2010-10-265-323/+412
|
* 324932V1_6_10RC1aclement2010-09-157-3/+210
|
* 323438aclement2010-08-252-0/+25
|
* 323438aclement2010-08-235-0/+283
|
* test fixed for non-windows - uses correct lower case dir nameaclement2010-08-181-5/+4
|
s="w"> const ucl_object_t *elt; struct rspamd_osb_tokenizer_config *cf, *def; guchar *key = NULL; gsize keylen; if (pool != NULL) { cf = rspamd_mempool_alloc0 (pool, sizeof (*cf)); } else { cf = g_malloc0 (sizeof (*cf)); } /* Use default config */ def = rspamd_tokenizer_osb_default_config (); memcpy (cf, def, sizeof (*cf)); elt = ucl_object_lookup (obj, "hash"); if (elt != NULL && ucl_object_type (elt) == UCL_STRING) { if (g_ascii_strncasecmp (ucl_object_tostring (elt), "xxh", 3) == 0) { cf->ht = RSPAMD_OSB_HASH_XXHASH; elt = ucl_object_lookup (obj, "seed"); if (elt != NULL && ucl_object_type (elt) == UCL_INT) { cf->seed = ucl_object_toint (elt); } } else if (g_ascii_strncasecmp (ucl_object_tostring (elt), "sip", 3) == 0) { cf->ht = RSPAMD_OSB_HASH_SIPHASH; elt = ucl_object_lookup (obj, "key"); if (elt != NULL && ucl_object_type (elt) == UCL_STRING) { key = rspamd_decode_base32 (ucl_object_tostring (elt), 0, &keylen); if (keylen < sizeof (rspamd_sipkey_t)) { msg_warn ("siphash key is too short: %z", keylen); g_free (key); } else { memcpy (cf->sk, key, sizeof (cf->sk)); g_free (key); } } else { msg_warn_pool ("siphash cannot be used without key"); } } } else { elt = ucl_object_lookup (obj, "compat"); if (elt != NULL && ucl_object_toboolean (elt)) { cf->ht = RSPAMD_OSB_HASH_COMPAT; } } elt = ucl_object_lookup (obj, "window"); if (elt != NULL && ucl_object_type (elt) == UCL_INT) { cf->window_size = ucl_object_toint (elt); if (cf->window_size > DEFAULT_FEATURE_WINDOW_SIZE * 4) { msg_err_pool ("too large window size: %d", cf->window_size); cf->window_size = DEFAULT_FEATURE_WINDOW_SIZE; } } return cf; } gpointer rspamd_tokenizer_osb_get_config (rspamd_mempool_t *pool, struct rspamd_tokenizer_config *cf, gsize *len) { struct rspamd_osb_tokenizer_config *osb_cf, *def; if (cf != NULL && cf->opts != NULL) { osb_cf = rspamd_tokenizer_osb_config_from_ucl (pool, cf->opts); } else { def = rspamd_tokenizer_osb_default_config (); osb_cf = rspamd_mempool_alloc (pool, sizeof (*osb_cf)); memcpy (osb_cf, def, sizeof (*osb_cf)); /* Do not write sipkey to statfile */ } if (osb_cf->ht == RSPAMD_OSB_HASH_SIPHASH) { msg_info_pool ("siphash key is not stored into statfiles, so you'd " "need to keep it inside the configuration"); } memset (osb_cf->sk, 0, sizeof (osb_cf->sk)); if (len != NULL) { *len = sizeof (*osb_cf); } return osb_cf; } #if 0 gboolean rspamd_tokenizer_osb_compatible_config (struct rspamd_tokenizer_runtime *rt, gpointer ptr, gsize len) { struct rspamd_osb_tokenizer_config *osb_cf, *test_cf; gboolean ret = FALSE; test_cf = rt->config; g_assert (test_cf != NULL); if (len == sizeof (*osb_cf)) { osb_cf = ptr; if (memcmp (osb_cf, osb_tokenizer_magic, sizeof (osb_tokenizer_magic)) != 0) { ret = test_cf->ht == RSPAMD_OSB_HASH_COMPAT; } else { if (osb_cf->version == DEFAULT_OSB_VERSION) { /* We can compare them directly now */ ret = (memcmp (osb_cf, test_cf, sizeof (*osb_cf) - sizeof (osb_cf->sk))) == 0; } } } else { /* We are compatible now merely with fallback config */ if (test_cf->ht == RSPAMD_OSB_HASH_COMPAT) { ret = TRUE; } } return ret; } gboolean rspamd_tokenizer_osb_load_config (rspamd_mempool_t *pool, struct rspamd_tokenizer_runtime *rt, gpointer ptr, gsize len) { struct rspamd_osb_tokenizer_config *osb_cf; if (ptr == NULL || len == 0) { osb_cf = rspamd_tokenizer_osb_config_from_ucl (pool, rt->tkcf->opts); if (osb_cf->ht != RSPAMD_OSB_HASH_COMPAT) { /* Trying to load incompatible configuration */ msg_err_pool ("cannot load tokenizer configuration from a legacy " "statfile; maybe you have forgotten to set 'compat' option" " in the tokenizer configuration"); return FALSE; } } else { g_assert (len == sizeof (*osb_cf)); osb_cf = ptr; } rt->config = osb_cf; rt->conf_len = sizeof (*osb_cf); return TRUE; } gboolean rspamd_tokenizer_osb_is_compat (struct rspamd_tokenizer_runtime *rt) { struct rspamd_osb_tokenizer_config *osb_cf = rt->config; return (osb_cf->ht == RSPAMD_OSB_HASH_COMPAT); } #endif struct token_pipe_entry { guint64 h; rspamd_stat_token_t *t; }; gint rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx, struct rspamd_task *task, GArray *words, gboolean is_utf, const gchar *prefix, GPtrArray *result) { rspamd_token_t *new_tok = NULL; rspamd_stat_token_t *token; struct rspamd_osb_tokenizer_config *osb_cf; guint64 cur, seed; struct token_pipe_entry *hashpipe; guint32 h1, h2; gsize token_size; guint processed = 0, i, w, window_size, token_flags = 0; if (words == NULL) { return FALSE; } osb_cf = ctx->tkcf; window_size = osb_cf->window_size; if (prefix) { seed = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, prefix, strlen (prefix), osb_cf->seed); } else { seed = osb_cf->seed; } hashpipe = g_alloca (window_size * sizeof (hashpipe[0])); for (i = 0; i < window_size; i++) { hashpipe[i].h = 0xfe; hashpipe[i].t = NULL; } token_size = sizeof (rspamd_token_t) + sizeof (gdouble) * ctx->statfiles->len; g_assert (token_size > 0); for (w = 0; w < words->len; w ++) { token = &g_array_index (words, rspamd_stat_token_t, w); token_flags = token->flags; const gchar *begin; gsize len; if (token->flags & (RSPAMD_STAT_TOKEN_FLAG_STOP_WORD|RSPAMD_STAT_TOKEN_FLAG_SKIPPED)) { /* Skip stop/skipped words */ continue; } if (token->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT) { begin = token->stemmed.begin; len = token->stemmed.len; } else { begin = token->original.begin; len = token->original.len; } if (osb_cf->ht == RSPAMD_OSB_HASH_COMPAT) { rspamd_ftok_t ftok; ftok.begin = begin; ftok.len = len; cur = rspamd_fstrhash_lc (&ftok, is_utf); } else { /* We know that the words are normalized */ if (osb_cf->ht == RSPAMD_OSB_HASH_XXHASH) { cur = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, begin, len, osb_cf->seed); } else { rspamd_cryptobox_siphash ((guchar *)&cur, begin, len, osb_cf->sk); if (prefix) { cur ^= seed; } } } if (token_flags & RSPAMD_STAT_TOKEN_FLAG_UNIGRAM) { new_tok = rspamd_mempool_alloc0 (task->task_pool, token_size); new_tok->flags = token_flags; new_tok->t1 = token; new_tok->t2 = token; new_tok->data = cur; new_tok->window_idx = 0; g_ptr_array_add (result, new_tok); continue; } #define ADD_TOKEN do {\ new_tok = rspamd_mempool_alloc0 (task->task_pool, token_size); \ new_tok->flags = token_flags; \ new_tok->t1 = hashpipe[0].t; \ new_tok->t2 = hashpipe[i].t; \ if (osb_cf->ht == RSPAMD_OSB_HASH_COMPAT) { \ h1 = ((guint32)hashpipe[0].h) * primes[0] + \ ((guint32)hashpipe[i].h) * primes[i << 1]; \ h2 = ((guint32)hashpipe[0].h) * primes[1] + \ ((guint32)hashpipe[i].h) * primes[(i << 1) - 1]; \ memcpy((guchar *)&new_tok->data, &h1, sizeof (h1)); \ memcpy(((guchar *)&new_tok->data) + sizeof (h1), &h2, sizeof (h2)); \ } \ else { \ new_tok->data = hashpipe[0].h * primes[0] + hashpipe[i].h * primes[i << 1]; \ } \ new_tok->window_idx = i; \ g_ptr_array_add (result, new_tok); \ } while(0) if (processed < window_size) { /* Just fill a hashpipe */ ++processed; hashpipe[window_size - processed].h = cur; hashpipe[window_size - processed].t = token; } else { /* Shift hashpipe */ for (i = window_size - 1; i > 0; i--) { hashpipe[i] = hashpipe[i - 1]; } hashpipe[0].h = cur; hashpipe[0].t = token; processed++; for (i = 1; i < window_size; i++) { if (!(hashpipe[i].t->flags & RSPAMD_STAT_TOKEN_FLAG_EXCEPTION)) { ADD_TOKEN; } } } } if (processed > 1 && processed <= window_size) { processed --; memmove (hashpipe, &hashpipe[window_size - processed], processed * sizeof (hashpipe[0])); for (i = 1; i < processed; i++) { ADD_TOKEN; } } #undef ADD_TOKEN return TRUE; }