You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rspamd_radix_test.c 9.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "rspamd.h"
  18. #include "radix.h"
  19. #include "ottery.h"
  20. #include "btrie.h"
  21. const gsize max_elts = 500 * 1024;
  22. const int lookup_cycles = 1 * 1024;
  23. const int lookup_divisor = 10;
  24. const uint masks[] = {
  25. 8,
  26. 16,
  27. 24,
  28. 32,
  29. 27,
  30. 29,
  31. 19,
  32. 13,
  33. 22};
  34. struct _tv {
  35. const char *ip;
  36. const char *nip;
  37. const char *m;
  38. uint32_t mask;
  39. uint8_t *addr;
  40. uint8_t *naddr;
  41. gsize len;
  42. } test_vec[] = {
  43. {"192.168.1.1", "192.168.1.2", "32", 0, 0, 0, 0},
  44. {"192.168.1.0", "192.168.2.1", "24", 0, 0, 0, 0},
  45. {"192.0.0.0", "193.167.2.1", "8", 0, 0, 0, 0},
  46. {"172.0.0.0", "171.16.1.0", "8", 0, 0, 0, 0},
  47. {"172.16.0.1", "127.0.0.1", "16", 0, 0, 0, 0},
  48. {"172.17.1.0", "10.0.0.1", "27", 0, 0, 0, 0},
  49. {"172.17.1.1", "0.0.0.1", "32", 0, 0, 0, 0},
  50. /* Some bad data known to cause problem in the past */
  51. {"191.245.170.246", NULL, "19", 0, 0, 0, 0},
  52. {"227.88.150.170", NULL, "23", 0, 0, 0, 0},
  53. {"105.225.182.92", NULL, "24", 0, 0, 0, 0},
  54. {"223.167.155.240", NULL, "29", 0, 0, 0, 0},
  55. {"125.241.220.172", NULL, "2", 0, 0, 0, 0},
  56. /* Mask = 0 */
  57. {"143.105.181.13", NULL, "8", 0, 0, 0, 0},
  58. {"113.241.233.86", NULL, "26", 0, 0, 0, 0},
  59. {"185.187.122.222", NULL, "8", 0, 0, 0, 0},
  60. {"109.206.26.202", NULL, "12", 0, 0, 0, 0},
  61. {"130.244.233.150", NULL, "0", 0, 0, 0, 0},
  62. /* Close ip addresses */
  63. {"1.2.3.1", NULL, "32", 0, 0, 0, 0},
  64. {"1.2.3.2", NULL, "32", 0, 0, 0, 0},
  65. {"1.2.3.3", NULL, "32", 0, 0, 0, 0},
  66. {"1.2.3.4", NULL, "32", 0, 0, 0, 0},
  67. {NULL, NULL, NULL, 0, 0, 0, 0}};
  68. static void
  69. rspamd_radix_test_vec(void)
  70. {
  71. radix_compressed_t *tree = radix_create_compressed(NULL);
  72. struct _tv *t = &test_vec[0];
  73. struct in_addr ina;
  74. struct in6_addr in6a;
  75. gulong i, val;
  76. while (t->ip != NULL) {
  77. t->addr = g_malloc(sizeof(in6a));
  78. t->naddr = g_malloc(sizeof(in6a));
  79. if (inet_pton(AF_INET, t->ip, &ina) == 1) {
  80. memcpy(t->addr, &ina, sizeof(ina));
  81. t->len = sizeof(ina);
  82. }
  83. else if (inet_pton(AF_INET6, t->ip, &in6a) == 1) {
  84. memcpy(t->addr, &in6a, sizeof(in6a));
  85. t->len = sizeof(in6a);
  86. }
  87. else {
  88. g_assert(0);
  89. }
  90. if (t->nip) {
  91. if (inet_pton(AF_INET, t->nip, &ina) == 1) {
  92. memcpy(t->naddr, &ina, sizeof(ina));
  93. }
  94. else if (inet_pton(AF_INET6, t->nip, &in6a) == 1) {
  95. memcpy(t->naddr, &in6a, sizeof(in6a));
  96. }
  97. else {
  98. g_assert(0);
  99. }
  100. }
  101. t->mask = t->len * NBBY - strtoul(t->m, NULL, 10);
  102. t++;
  103. }
  104. t = &test_vec[0];
  105. i = 0;
  106. while (t->ip != NULL) {
  107. radix_insert_compressed(tree, t->addr, t->len, t->mask, ++i);
  108. t++;
  109. }
  110. i = 0;
  111. t = &test_vec[0];
  112. while (t->ip != NULL) {
  113. val = radix_find_compressed(tree, t->addr, t->len);
  114. g_assert(val == ++i);
  115. /* g_assert (val != RADIX_NO_VALUE); */
  116. if (t->nip != NULL) {
  117. val = radix_find_compressed(tree, t->naddr, t->len);
  118. g_assert(val != i);
  119. }
  120. t++;
  121. }
  122. radix_destroy_compressed(tree);
  123. }
  124. static void
  125. rspamd_btrie_test_vec(void)
  126. {
  127. rspamd_mempool_t *pool;
  128. struct btrie *tree;
  129. struct _tv *t = &test_vec[0];
  130. struct in_addr ina;
  131. struct in6_addr in6a;
  132. gsize i;
  133. gpointer val;
  134. pool = rspamd_mempool_new(rspamd_mempool_suggest_size(), "btrie", 0);
  135. tree = btrie_init(pool);
  136. while (t->ip != NULL) {
  137. t->addr = g_malloc(sizeof(in6a));
  138. t->naddr = g_malloc(sizeof(in6a));
  139. if (inet_pton(AF_INET, t->ip, &ina) == 1) {
  140. memcpy(t->addr, &ina, sizeof(ina));
  141. t->len = sizeof(ina);
  142. }
  143. else if (inet_pton(AF_INET6, t->ip, &in6a) == 1) {
  144. memcpy(t->addr, &in6a, sizeof(in6a));
  145. t->len = sizeof(in6a);
  146. }
  147. else {
  148. g_assert(0);
  149. }
  150. if (t->nip) {
  151. if (inet_pton(AF_INET, t->nip, &ina) == 1) {
  152. memcpy(t->naddr, &ina, sizeof(ina));
  153. }
  154. else if (inet_pton(AF_INET6, t->nip, &in6a) == 1) {
  155. memcpy(t->naddr, &in6a, sizeof(in6a));
  156. }
  157. else {
  158. g_assert(0);
  159. }
  160. }
  161. t->mask = strtoul(t->m, NULL, 10);
  162. t++;
  163. }
  164. t = &test_vec[0];
  165. i = 0;
  166. while (t->ip != NULL) {
  167. g_assert(btrie_add_prefix(tree, t->addr, t->mask,
  168. GSIZE_TO_POINTER(++i)) == BTRIE_OKAY);
  169. t++;
  170. }
  171. i = 0;
  172. t = &test_vec[0];
  173. while (t->ip != NULL) {
  174. val = btrie_lookup(tree, t->addr, t->len * NBBY);
  175. i++;
  176. g_assert(GPOINTER_TO_SIZE(val) == i);
  177. if (t->nip != NULL) {
  178. val = btrie_lookup(tree, t->naddr, t->len * NBBY);
  179. g_assert(GPOINTER_TO_SIZE(val) != i);
  180. }
  181. t++;
  182. }
  183. }
  184. void rspamd_radix_test_func(void)
  185. {
  186. struct btrie *btrie;
  187. rspamd_mempool_t *pool;
  188. struct {
  189. uint32_t addr;
  190. uint32_t mask;
  191. uint8_t addr6[16];
  192. uint32_t mask6;
  193. uint8_t addr64[16];
  194. } *addrs;
  195. gsize nelts, i, check;
  196. int lc;
  197. gboolean all_good = TRUE;
  198. double ts1, ts2;
  199. double diff;
  200. /* Test suite for the compressed trie */
  201. rspamd_btrie_test_vec();
  202. rspamd_radix_test_vec();
  203. rspamd_random_seed_fast();
  204. nelts = max_elts;
  205. /* First of all we generate many elements and push them to the array */
  206. addrs = g_malloc(nelts * sizeof(addrs[0]));
  207. for (i = 0; i < nelts; i++) {
  208. addrs[i].addr = ottery_rand_uint32();
  209. memset(addrs[i].addr64, 0, 10);
  210. memcpy(addrs[i].addr64 + 12, &addrs[i].addr, 4);
  211. addrs[i].mask = masks[ottery_rand_range(G_N_ELEMENTS(masks) - 1)];
  212. ottery_rand_bytes(addrs[i].addr6, sizeof(addrs[i].addr6));
  213. addrs[i].mask6 = ottery_rand_range(128 - 16) + 16;
  214. }
  215. pool = rspamd_mempool_new(65536, "btrie6", 0);
  216. btrie = btrie_init(pool);
  217. msg_notice("btrie performance ipv6 only (%z elts)", nelts);
  218. ts1 = rspamd_get_ticks(TRUE);
  219. for (i = 0; i < nelts; i++) {
  220. btrie_add_prefix(btrie, addrs[i].addr6,
  221. addrs[i].mask6, GSIZE_TO_POINTER(i + 1));
  222. }
  223. ts2 = rspamd_get_ticks(TRUE);
  224. diff = (ts2 - ts1);
  225. msg_notice("Added %hz elements in %.0f ticks (%.2f ticks per element)",
  226. nelts, diff, diff / (double) nelts);
  227. ts1 = rspamd_get_ticks(TRUE);
  228. for (lc = 0; lc < lookup_cycles && all_good; lc++) {
  229. for (i = 0; i < nelts / lookup_divisor; i++) {
  230. check = rspamd_random_uint64_fast() % nelts;
  231. if (btrie_lookup(btrie, addrs[check].addr6, sizeof(addrs[check].addr6) * 8) == NULL) {
  232. char ipbuf[INET6_ADDRSTRLEN + 1];
  233. all_good = FALSE;
  234. inet_ntop(AF_INET6, addrs[check].addr6, ipbuf, sizeof(ipbuf));
  235. msg_notice("BAD btrie: {\"%s\", NULL, \"%ud\", 0, 0, 0, 0},",
  236. ipbuf,
  237. addrs[check].mask6);
  238. all_good = FALSE;
  239. }
  240. }
  241. }
  242. g_assert(all_good);
  243. ts2 = rspamd_get_ticks(TRUE);
  244. diff = (ts2 - ts1);
  245. msg_notice("Checked %hz elements in %.0f ticks (%.2f ticks per lookup)",
  246. nelts * lookup_cycles / lookup_divisor, diff,
  247. diff / ((double) nelts * lookup_cycles / lookup_divisor));
  248. rspamd_mempool_delete(pool);
  249. /*
  250. * IPv4 part
  251. */
  252. pool = rspamd_mempool_new(65536, "btrie4", 0);
  253. btrie = btrie_init(pool);
  254. msg_notice("btrie performance ipv4 only (%z elts)", nelts);
  255. ts1 = rspamd_get_ticks(TRUE);
  256. for (i = 0; i < nelts; i++) {
  257. btrie_add_prefix(btrie, (unsigned char *) &addrs[i].addr,
  258. addrs[i].mask, GSIZE_TO_POINTER(i + 1));
  259. }
  260. ts2 = rspamd_get_ticks(TRUE);
  261. diff = (ts2 - ts1);
  262. msg_notice("Added %hz elements in %.0f ticks (%.2f ticks per element)",
  263. nelts, diff, diff / (double) nelts);
  264. ts1 = rspamd_get_ticks(TRUE);
  265. for (lc = 0; lc < lookup_cycles && all_good; lc++) {
  266. for (i = 0; i < nelts / lookup_divisor; i++) {
  267. check = rspamd_random_uint64_fast() % nelts;
  268. if (btrie_lookup(btrie, (unsigned char *) &addrs[check].addr, sizeof(addrs[check].addr) * 8) == NULL) {
  269. char ipbuf[INET6_ADDRSTRLEN + 1];
  270. all_good = FALSE;
  271. inet_ntop(AF_INET, (unsigned char *) &addrs[check].addr, ipbuf, sizeof(ipbuf));
  272. msg_notice("BAD btrie: {\"%s\", NULL, \"%ud\", 0, 0, 0, 0},",
  273. ipbuf,
  274. addrs[check].mask);
  275. all_good = FALSE;
  276. }
  277. }
  278. }
  279. g_assert(all_good);
  280. ts2 = rspamd_get_ticks(TRUE);
  281. diff = (ts2 - ts1);
  282. msg_notice("Checked %hz elements in %.0f ticks (%.2f ticks per lookup)",
  283. nelts * lookup_cycles / lookup_divisor, diff,
  284. diff / ((double) nelts * lookup_cycles / lookup_divisor));
  285. rspamd_mempool_delete(pool);
  286. /*
  287. * IPv4 -> IPv6 mapped
  288. */
  289. pool = rspamd_mempool_new(65536, "btrie4map", 0);
  290. btrie = btrie_init(pool);
  291. msg_notice("btrie performance ipv4 + ipv6map (%z elts)", nelts);
  292. ts1 = rspamd_get_ticks(TRUE);
  293. for (i = 0; i < nelts; i++) {
  294. btrie_add_prefix(btrie, addrs[i].addr64,
  295. addrs[i].mask + 96, GSIZE_TO_POINTER(i + 1));
  296. }
  297. ts2 = rspamd_get_ticks(TRUE);
  298. diff = (ts2 - ts1);
  299. msg_notice("Added %hz elements in %.0f ticks (%.2f ticks per element)",
  300. nelts, diff, diff / (double) nelts);
  301. ts1 = rspamd_get_ticks(TRUE);
  302. for (lc = 0; lc < lookup_cycles && all_good; lc++) {
  303. for (i = 0; i < nelts / lookup_divisor; i++) {
  304. check = rspamd_random_uint64_fast() % nelts;
  305. if (btrie_lookup(btrie, addrs[check].addr64,
  306. sizeof(addrs[check].addr64) * 8) == NULL) {
  307. char ipbuf[INET6_ADDRSTRLEN + 1];
  308. all_good = FALSE;
  309. inet_ntop(AF_INET, (unsigned char *) &addrs[check].addr, ipbuf, sizeof(ipbuf));
  310. msg_notice("BAD btrie: {\"%s\", NULL, \"%ud\", 0, 0, 0, 0},",
  311. ipbuf,
  312. addrs[check].mask);
  313. all_good = FALSE;
  314. }
  315. }
  316. }
  317. g_assert(all_good);
  318. ts2 = rspamd_get_ticks(TRUE);
  319. diff = (ts2 - ts1);
  320. msg_notice("Checked %hz elements in %.0f ticks (%.2f ticks per lookup)",
  321. nelts * lookup_cycles / lookup_divisor, diff,
  322. diff / ((double) nelts * lookup_cycles / lookup_divisor));
  323. rspamd_mempool_delete(pool);
  324. g_free(addrs);
  325. }