You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

bloom.c 4.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "bloom.h"
  18. #include "cryptobox.h"
  19. /* 4 bits are used for counting (implementing delete operation) */
  20. #define SIZE_BIT 4
  21. /* These macroes are for 4 bits for counting element */
  22. #define INCBIT(a, n, acc) do { \
  23. acc = \
  24. a[n * SIZE_BIT / CHAR_BIT] & (0xF << \
  25. (n % (CHAR_BIT / SIZE_BIT) * SIZE_BIT)); \
  26. acc ++; \
  27. acc &= 0xF; \
  28. \
  29. a[n * SIZE_BIT / \
  30. CHAR_BIT] &= (0xF << (4 - (n % (CHAR_BIT / SIZE_BIT) * SIZE_BIT))); \
  31. a[n * SIZE_BIT / \
  32. CHAR_BIT] |= (acc << (n % (CHAR_BIT / SIZE_BIT) * SIZE_BIT)); \
  33. } while (0);
  34. #define DECBIT(a, n, acc) do { \
  35. acc = \
  36. a[n * SIZE_BIT / CHAR_BIT] & (0xF << \
  37. (n % (CHAR_BIT / SIZE_BIT) * SIZE_BIT)); \
  38. acc --; \
  39. acc &= 0xF; \
  40. \
  41. a[n * SIZE_BIT / \
  42. CHAR_BIT] &= (0xF << (4 - (n % (CHAR_BIT / SIZE_BIT) * SIZE_BIT))); \
  43. a[n * SIZE_BIT / \
  44. CHAR_BIT] |= (acc << (n % (CHAR_BIT / SIZE_BIT) * SIZE_BIT)); \
  45. } while (0);
  46. #define GETBIT(a, \
  47. n) (a[n * SIZE_BIT / CHAR_BIT] & (0xF << \
  48. (n % (CHAR_BIT / SIZE_BIT) * SIZE_BIT)))
  49. /* Common hash functions */
  50. rspamd_bloom_filter_t *
  51. rspamd_bloom_create (size_t size, size_t nfuncs, ...)
  52. {
  53. rspamd_bloom_filter_t *bloom;
  54. va_list l;
  55. gsize n;
  56. if (!(bloom = g_malloc (sizeof (rspamd_bloom_filter_t)))) {
  57. return NULL;
  58. }
  59. if (!(bloom->a =
  60. g_new0 (gchar, (size + CHAR_BIT - 1) / CHAR_BIT * SIZE_BIT))) {
  61. g_free (bloom);
  62. return NULL;
  63. }
  64. if (!(bloom->seeds = g_new0 (guint32, nfuncs))) {
  65. g_free (bloom->a);
  66. g_free (bloom);
  67. return NULL;
  68. }
  69. va_start (l, nfuncs);
  70. for (n = 0; n < nfuncs; ++n) {
  71. bloom->seeds[n] = va_arg (l, guint32);
  72. }
  73. va_end (l);
  74. bloom->nfuncs = nfuncs;
  75. bloom->asize = size;
  76. return bloom;
  77. }
  78. void
  79. rspamd_bloom_destroy (rspamd_bloom_filter_t * bloom)
  80. {
  81. g_free (bloom->a);
  82. g_free (bloom->seeds);
  83. g_free (bloom);
  84. }
  85. gboolean
  86. rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s)
  87. {
  88. size_t n, len;
  89. u_char t;
  90. guint v;
  91. if (s == NULL) {
  92. return FALSE;
  93. }
  94. len = strlen (s);
  95. for (n = 0; n < bloom->nfuncs; ++n) {
  96. v = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
  97. s, len, bloom->seeds[n]) % bloom->asize;
  98. INCBIT (bloom->a, v, t);
  99. }
  100. return TRUE;
  101. }
  102. gboolean
  103. rspamd_bloom_del (rspamd_bloom_filter_t * bloom, const gchar *s)
  104. {
  105. size_t n, len;
  106. u_char t;
  107. guint v;
  108. if (s == NULL) {
  109. return FALSE;
  110. }
  111. len = strlen (s);
  112. for (n = 0; n < bloom->nfuncs; ++n) {
  113. v = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
  114. s, len, bloom->seeds[n]) % bloom->asize;
  115. DECBIT (bloom->a, v, t);
  116. }
  117. return TRUE;
  118. }
  119. gboolean
  120. rspamd_bloom_check (rspamd_bloom_filter_t * bloom, const gchar *s)
  121. {
  122. size_t n, len;
  123. guint v;
  124. if (s == NULL) {
  125. return FALSE;
  126. }
  127. len = strlen (s);
  128. for (n = 0; n < bloom->nfuncs; ++n) {
  129. v = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64,
  130. s, len, bloom->seeds[n]) % bloom->asize;
  131. if (!(GETBIT (bloom->a, v))) {
  132. return FALSE;
  133. }
  134. }
  135. return TRUE;
  136. }