}
blake2b_init (&bs, BLAKE2B_OUTBYTES);
+ memset (h, 0, sizeof (h));
cur_key = key;
out_key = (guchar *)&keys[0];
blake2b_init (&bs, BLAKE2B_OUTBYTES);
cur_key = out_key;
out_key += 16;
- memset (&h[i], 0, sizeof (h[0]));
sip24_init (&h[i], &keys[i]);
}
/* Now parse input words into a vector of hashes using rolling window */
- for (i = 0; i < (gint)input->len; i ++) {
- if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len - 1) {
- for (j = beg; j <= i; j ++) {
+ for (i = 0; i <= (gint)input->len; i ++) {
+ if (i - beg >= SHINGLES_WINDOW || i == (gint)input->len) {
+ for (j = beg; j < i; j ++) {
rspamd_shingles_update_row (&g_array_index (input,
rspamd_fstring_t, j), h);
}
/* Now we need to filter all hashes and make a shingles result */
for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
res->hashes[i] = filter ((guint64 *)hashes[i]->data, hashes[i]->len,
- filterd);
+ i, key, filterd);
g_array_free (hashes[i], TRUE);
}
guint64
rspamd_shingles_default_filter (guint64 *input, gsize count,
- gpointer ud)
+ gint shno, const guchar *key, gpointer ud)
{
guint64 minimal = G_MAXUINT64;
gsize i;
#include "config.h"
#include "mem_pool.h"
-#define RSPAMD_SHINGLE_SIZE 23
+#define RSPAMD_SHINGLE_SIZE 32
struct rspamd_shingle {
guint64 hashes[RSPAMD_SHINGLE_SIZE];
* @return shingle value
*/
typedef guint64 (*rspamd_shingles_filter) (guint64 *input, gsize count,
- gpointer ud);
+ gint shno, const guchar *key, gpointer ud);
/**
* Generate shingles from the input of fixed size strings using lemmatizer
* Default filtering function
*/
guint64 rspamd_shingles_default_filter (guint64 *input, gsize count,
- gpointer ud);
+ gint shno, const guchar *key, gpointer ud);
#endif /* SHINGLES_H_ */