}
rspamd_url_text_extract (task->task_pool, task, text_part, TRUE);
- rspamd_fuzzy_from_text_part (text_part, task->task_pool, task->cfg->max_diff);
rspamd_mempool_add_destructor (task->task_pool,
(rspamd_mempool_destruct_t) free_byte_array_callback,
text_part->content);
text_part);
text_part->orig = part_content;
rspamd_url_text_extract (task->task_pool, task, text_part, FALSE);
- rspamd_fuzzy_from_text_part (text_part, task->task_pool, task->cfg->max_diff);
g_ptr_array_add (task->text_parts, text_part);
}
else {
struct expression_argument *arg;
GMimeObject *parent;
const GMimeContentType *ct;
+ guint tw, dw;
gint *pdiff;
if (args == NULL || args->len == 0) {
NULL);
return FALSE;
}
- if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2)) {
- if (p1->diff_str != NULL && p2->diff_str != NULL) {
- diff = rspamd_diff_distance_normalized (p1->diff_str,
- p2->diff_str);
- }
- else {
- diff = rspamd_fuzzy_compare_parts (p1, p2);
- }
- debug_task (
+ if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2) &&
+ p1->normalized_words && p2->normalized_words) {
+
+ tw = 0;
+ dw = 0;
+ diff = 100;
+ /* XXX: Need levenshtein distance for a set of words */
+
+ msg_debug (
+ "different words: %d, total words: %d, "
"got likeliness between parts of %d%%, threshold is %d%%",
+ dw, tw,
diff,
threshold);
+
*pdiff = diff;
rspamd_mempool_set_variable (task->task_pool,
"parts_distance",
str->len = r;
}
+
+gboolean
+rspamd_fstring_equal (const rspamd_fstring_t *s1,
+ const rspamd_fstring_t *s2)
+{
+ g_assert (s1 != NULL && s2 != NULL);
+
+ if (s1->len == s2->len) {
+ return (memcmp (s1->begin, s2->begin, s1->len) == 0);
+ }
+
+ return FALSE;
+}
*/
void rspamd_fstrstrip (rspamd_fstring_t *str);
+gboolean rspamd_fstring_equal (const rspamd_fstring_t *s1,
+ const rspamd_fstring_t *s2);
+
#endif
task->message_id, fuzzy_module_ctx->min_bytes);
continue;
}
- /* Check length of hash */
- hashlen = strlen (part->fuzzy->hash_pipe);
- if (hashlen == 0) {
+ if (part->words == NULL || part->words->len == 0) {
msg_info ("<%s>, part hash empty, skip fuzzy check",
task->message_id, fuzzy_module_ctx->min_hash_len);
continue;
}
if (fuzzy_module_ctx->min_hash_len != 0 &&
- hashlen * part->fuzzy->block_size <
- fuzzy_module_ctx->min_hash_len) {
+ part->words->len < fuzzy_module_ctx->min_hash_len) {
msg_info (
"<%s>, part hash is shorter than %d symbols, skip fuzzy check",
task->message_id,
continue;
}
- /*
- * Try legacy first
- */
- cmd = fuzzy_cmd_from_text_part (rule, c, flag, value, task->task_pool,
- part, TRUE, NULL);
- if (cmd) {
- g_ptr_array_add (res, cmd);
- }
cmd = fuzzy_cmd_from_text_part (rule, c, flag, value, task->task_pool,
part, FALSE, NULL);
if (cmd) {