You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

upstream.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. #ifdef _THREAD_SAFE
  2. #include <pthread.h>
  3. #endif
  4. #include <sys/types.h>
  5. #include <time.h>
  6. #include <stdlib.h>
  7. #include <stdio.h>
  8. #ifdef HAVE_STDINT_H
  9. #include <stdint.h>
  10. #endif
  11. #ifdef HAVE_INTTYPES_H
  12. #include <inttypes.h>
  13. #endif
  14. #include <limits.h>
  15. #ifdef WITH_DEBUG
  16. #include <syslog.h>
  17. #endif
  18. #include "upstream.h"
  19. #ifdef WITH_DEBUG
  20. #define msg_debug(args...) syslog(LOG_DEBUG, ##args)
  21. #else
  22. #define msg_debug(args...) do {} while(0)
  23. #endif
  24. #ifdef _THREAD_SAFE
  25. pthread_rwlock_t upstream_mtx = PTHREAD_RWLOCK_INITIALIZER;
  26. #define U_RLOCK() do { pthread_rwlock_rdlock (&upstream_mtx); } while (0)
  27. #define U_WLOCK() do { pthread_rwlock_wrlock (&upstream_mtx); } while (0)
  28. #define U_UNLOCK() do { pthread_rwlock_unlock (&upstream_mtx); } while (0)
  29. #else
  30. #define U_RLOCK() do {} while (0)
  31. #define U_WLOCK() do {} while (0)
  32. #define U_UNLOCK() do {} while (0)
  33. #endif
  34. #define MAX_TRIES 20
  35. /*
  36. * Poly: 0xedb88320
  37. * Init: 0x0
  38. */
  39. static const uint32_t crc32lookup[256] = {
  40. 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U, 0x706af48fU,
  41. 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U, 0xe0d5e91eU, 0x97d2d988U,
  42. 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U, 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U,
  43. 0xf3b97148U, 0x84be41deU, 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U,
  44. 0x136c9856U, 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
  45. 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U, 0xa2677172U,
  46. 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU, 0x35b5a8faU, 0x42b2986cU,
  47. 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U, 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U,
  48. 0x26d930acU, 0x51de003aU, 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U,
  49. 0xcfba9599U, 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
  50. 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U, 0x01db7106U,
  51. 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU, 0x9fbfe4a5U, 0xe8b8d433U,
  52. 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU, 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU,
  53. 0x91646c97U, 0xe6635c01U, 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU,
  54. 0x6c0695edU, 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
  55. 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U, 0xfbd44c65U,
  56. 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U, 0x4adfa541U, 0x3dd895d7U,
  57. 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU, 0x346ed9fcU, 0xad678846U, 0xda60b8d0U,
  58. 0x44042d73U, 0x33031de5U, 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU,
  59. 0xbe0b1010U, 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
  60. 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U, 0x2eb40d81U,
  61. 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U, 0x03b6e20cU, 0x74b1d29aU,
  62. 0xead54739U, 0x9dd277afU, 0x04db2615U, 0x73dc1683U, 0xe3630b12U, 0x94643b84U,
  63. 0x0d6d6a3eU, 0x7a6a5aa8U, 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U,
  64. 0xf00f9344U, 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
  65. 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU, 0x67dd4accU,
  66. 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U, 0xd6d6a3e8U, 0xa1d1937eU,
  67. 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U, 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU,
  68. 0xd80d2bdaU, 0xaf0a1b4cU, 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U,
  69. 0x316e8eefU, 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
  70. 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU, 0xb2bd0b28U,
  71. 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U, 0x2cd99e8bU, 0x5bdeae1dU,
  72. 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU, 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU,
  73. 0x72076785U, 0x05005713U, 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U,
  74. 0x92d28e9bU, 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
  75. 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U, 0x18b74777U,
  76. 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU, 0x8f659effU, 0xf862ae69U,
  77. 0x616bffd3U, 0x166ccf45U, 0xa00ae278U, 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U,
  78. 0xa7672661U, 0xd06016f7U, 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU,
  79. 0x40df0b66U, 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
  80. 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U, 0xcdd70693U,
  81. 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U, 0x5d681b02U, 0x2a6f2b94U,
  82. 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU, 0x2d02ef8dU
  83. };
  84. /*
  85. * Check upstream parameters and mark it whether valid or dead
  86. */
  87. static void
  88. check_upstream (struct upstream *up, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors)
  89. {
  90. if (up->dead) {
  91. if (now - up->time >= revive_timeout) {
  92. msg_debug ("check_upstream: reviving upstream after %ld seconds", (long int) now - up->time);
  93. U_WLOCK ();
  94. up->dead = 0;
  95. up->errors = 0;
  96. up->time = 0;
  97. up->weight = up->priority;
  98. U_UNLOCK ();
  99. }
  100. }
  101. else {
  102. if (now - up->time >= error_timeout && up->errors >= max_errors) {
  103. msg_debug ("check_upstream: marking upstreams as dead after %ld errors", (long int) up->errors);
  104. U_WLOCK ();
  105. up->dead = 1;
  106. up->time = now;
  107. up->weight = 0;
  108. U_UNLOCK ();
  109. }
  110. }
  111. }
  112. /*
  113. * Call this function after failed upstream request
  114. */
  115. void
  116. upstream_fail (struct upstream *up, time_t now)
  117. {
  118. if (up->time != 0) {
  119. up->errors ++;
  120. }
  121. else {
  122. U_WLOCK ();
  123. up->time = now;
  124. up->errors ++;
  125. U_UNLOCK ();
  126. }
  127. }
  128. /*
  129. * Call this function after successfull upstream request
  130. */
  131. void
  132. upstream_ok (struct upstream *up, time_t now)
  133. {
  134. if (up->errors != 0) {
  135. U_WLOCK ();
  136. up->errors = 0;
  137. up->time = 0;
  138. U_UNLOCK ();
  139. }
  140. up->weight --;
  141. }
  142. /*
  143. * Mark all upstreams as active. This function is used when all upstreams are marked as inactive
  144. */
  145. void
  146. revive_all_upstreams (void *ups, size_t members, size_t msize)
  147. {
  148. int i;
  149. struct upstream *cur;
  150. u_char *p;
  151. U_WLOCK ();
  152. msg_debug ("revive_all_upstreams: starting reviving all upstreams");
  153. p = ups;
  154. for (i = 0; i < members; i++) {
  155. cur = (struct upstream *)p;
  156. cur->time = 0;
  157. cur->errors = 0;
  158. cur->dead = 0;
  159. cur->weight = cur->priority;
  160. p += msize;
  161. }
  162. U_UNLOCK ();
  163. }
  164. /*
  165. * Scan all upstreams for errors and mark upstreams dead or alive depends on conditions,
  166. * return number of alive upstreams
  167. */
  168. static int
  169. rescan_upstreams (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors)
  170. {
  171. int i, alive;
  172. struct upstream *cur;
  173. u_char *p;
  174. /* Recheck all upstreams */
  175. p = ups;
  176. alive = members;
  177. for (i = 0; i < members; i++) {
  178. cur = (struct upstream *)p;
  179. check_upstream (cur, now, error_timeout, revive_timeout, max_errors);
  180. alive -= cur->dead;
  181. p += msize;
  182. }
  183. /* All upstreams are dead */
  184. if (alive == 0) {
  185. revive_all_upstreams (ups, members, msize);
  186. alive = members;
  187. }
  188. msg_debug ("rescan_upstreams: %d upstreams alive", alive);
  189. return alive;
  190. }
  191. /* Return alive upstream by its number */
  192. static struct upstream *
  193. get_upstream_by_number (void *ups, size_t members, size_t msize, int selected)
  194. {
  195. int i;
  196. u_char *p, *c;
  197. struct upstream *cur;
  198. i = 0;
  199. p = ups;
  200. c = ups;
  201. U_RLOCK ();
  202. for (;;) {
  203. /* Out of range, return NULL */
  204. if (p > c + members * msize) {
  205. break;
  206. }
  207. cur = (struct upstream *)p;
  208. p += msize;
  209. if (cur->dead) {
  210. /* Skip inactive upstreams */
  211. continue;
  212. }
  213. /* Return selected upstream */
  214. if (i == selected) {
  215. U_UNLOCK ();
  216. return cur;
  217. }
  218. i++;
  219. }
  220. U_UNLOCK ();
  221. /* Error */
  222. return NULL;
  223. }
  224. /*
  225. * Get hash key for specified key (perl hash)
  226. */
  227. static uint32_t
  228. get_hash_for_key (uint32_t hash, char *key, size_t keylen)
  229. {
  230. uint32_t h, index;
  231. const char *end = key + keylen;
  232. h = ~hash;
  233. while (key < end) {
  234. index = (h ^ (u_char) *key) & 0x000000ffU;
  235. h = (h >> 8) ^ crc32lookup[index];
  236. ++key;
  237. }
  238. return (~h);
  239. }
  240. /*
  241. * Recheck all upstreams and return random active upstream
  242. */
  243. struct upstream *
  244. get_random_upstream (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors)
  245. {
  246. int alive, selected;
  247. alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors);
  248. selected = rand () % alive;
  249. msg_debug ("get_random_upstream: return upstream with number %d of %d", selected, alive);
  250. return get_upstream_by_number (ups, members, msize, selected);
  251. }
  252. /*
  253. * Return upstream by hash, that is calculated from active upstreams number
  254. */
  255. struct upstream *
  256. get_upstream_by_hash (void *ups, size_t members, size_t msize, time_t now,
  257. time_t error_timeout, time_t revive_timeout, size_t max_errors,
  258. char *key, size_t keylen)
  259. {
  260. int alive, tries = 0, r;
  261. uint32_t h = 0, ht;
  262. char *p, numbuf[4];
  263. struct upstream *cur;
  264. alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors);
  265. if (alive == 0) {
  266. return NULL;
  267. }
  268. h = get_hash_for_key (0, key, keylen);
  269. #ifdef HASH_COMPAT
  270. h = (h >> 16) & 0x7fff;
  271. #endif
  272. h %= members;
  273. msg_debug ("get_upstream_by_hash: try to select upstream number %d of %zd", h, members);
  274. for (;;) {
  275. p = (char *)ups + msize * h;
  276. cur = (struct upstream *)p;
  277. if (!cur->dead) {
  278. break;
  279. }
  280. r = snprintf (numbuf, sizeof (numbuf), "%d", tries);
  281. ht = get_hash_for_key (0, numbuf, r);
  282. ht = get_hash_for_key (ht, key, keylen);
  283. #ifdef HASH_COMPAT
  284. h += (ht >> 16) & 0x7fff;
  285. #else
  286. h += ht;
  287. #endif
  288. h %= members;
  289. msg_debug ("get_upstream_by_hash: try to select upstream number %d of %zd, tries: %d", h, members, tries);
  290. tries ++;
  291. if (tries > MAX_TRIES) {
  292. msg_debug ("get_upstream_by_hash: max tries exceed, returning NULL");
  293. return NULL;
  294. }
  295. }
  296. U_RLOCK ();
  297. p = ups;
  298. U_UNLOCK ();
  299. return cur;
  300. }
  301. /*
  302. * Recheck all upstreams and return upstream in round-robin order according to weight and priority
  303. */
  304. struct upstream *
  305. get_upstream_round_robin (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors)
  306. {
  307. int alive, max_weight, i;
  308. struct upstream *cur, *selected = NULL;
  309. u_char *p;
  310. /* Recheck all upstreams */
  311. alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors);
  312. p = ups;
  313. max_weight = 0;
  314. selected = (struct upstream *)p;
  315. U_RLOCK ();
  316. for (i = 0; i < members; i++) {
  317. cur = (struct upstream *)p;
  318. if (!cur->dead) {
  319. if (max_weight < cur->weight) {
  320. max_weight = cur->weight;
  321. selected = cur;
  322. }
  323. }
  324. p += msize;
  325. }
  326. U_UNLOCK ();
  327. if (max_weight == 0) {
  328. p = ups;
  329. U_WLOCK ();
  330. for (i = 0; i < members; i++) {
  331. cur = (struct upstream *)p;
  332. cur->weight = cur->priority;
  333. if (!cur->dead) {
  334. if (max_weight < cur->priority) {
  335. max_weight = cur->priority;
  336. selected = cur;
  337. }
  338. }
  339. p += msize;
  340. }
  341. U_UNLOCK ();
  342. }
  343. msg_debug ("get_upstream_round_robin: selecting upstream with weight %d", max_weight);
  344. return selected;
  345. }
  346. /*
  347. * Recheck all upstreams and return upstream in round-robin order according to only priority (master-slaves)
  348. */
  349. struct upstream *
  350. get_upstream_master_slave (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors)
  351. {
  352. int alive, max_weight, i;
  353. struct upstream *cur, *selected = NULL;
  354. u_char *p;
  355. /* Recheck all upstreams */
  356. alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors);
  357. p = ups;
  358. max_weight = 0;
  359. selected = (struct upstream *)p;
  360. U_RLOCK ();
  361. for (i = 0; i < members; i++) {
  362. cur = (struct upstream *)p;
  363. if (!cur->dead) {
  364. if (max_weight < cur->priority) {
  365. max_weight = cur->priority;
  366. selected = cur;
  367. }
  368. }
  369. p += msize;
  370. }
  371. U_UNLOCK ();
  372. msg_debug ("get_upstream_master_slave: selecting upstream with priority %d", max_weight);
  373. return selected;
  374. }
  375. /*
  376. * Ketama manipulation functions
  377. */
  378. static int
  379. ketama_sort_cmp (const void *a1, const void *a2)
  380. {
  381. return *((uint32_t *)a1) - *((uint32_t *)a2);
  382. }
  383. /*
  384. * Add ketama points for specified upstream
  385. */
  386. int
  387. upstream_ketama_add (struct upstream *up, char *up_key, size_t keylen, size_t keypoints)
  388. {
  389. uint32_t h = 0;
  390. char tmp[4];
  391. int i;
  392. /* Allocate ketama points array */
  393. if (up->ketama_points == NULL) {
  394. up->ketama_points_size = keypoints;
  395. up->ketama_points = malloc (sizeof (uint32_t) * up->ketama_points_size);
  396. if (up->ketama_points == NULL) {
  397. return -1;
  398. }
  399. }
  400. h = get_hash_for_key (h, up_key, keylen);
  401. for (i = 0; i < keypoints; i++) {
  402. tmp[0] = i & 0xff;
  403. tmp[1] = (i >> 8) & 0xff;
  404. tmp[2] = (i >> 16) & 0xff;
  405. tmp[3] = (i >> 24) & 0xff;
  406. h = get_hash_for_key (h, tmp, sizeof (tmp) * sizeof (char));
  407. up->ketama_points[i] = h;
  408. }
  409. /* Keep points sorted */
  410. qsort (up->ketama_points, keypoints, sizeof (uint32_t), ketama_sort_cmp);
  411. return 0;
  412. }
  413. /*
  414. * Return upstream by hash and find nearest ketama point in some server
  415. */
  416. struct upstream *
  417. get_upstream_by_hash_ketama (void *ups, size_t members, size_t msize, time_t now,
  418. time_t error_timeout, time_t revive_timeout, size_t max_errors,
  419. char *key, size_t keylen)
  420. {
  421. int alive, i;
  422. uint32_t h = 0, step, middle, d, min_diff = UINT_MAX;
  423. char *p;
  424. struct upstream *cur = NULL, *nearest = NULL;
  425. alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors);
  426. if (alive == 0) {
  427. return NULL;
  428. }
  429. h = get_hash_for_key (h, key, keylen);
  430. U_RLOCK ();
  431. p = ups;
  432. nearest = (struct upstream *)p;
  433. for (i = 0; i < members; i++) {
  434. cur = (struct upstream *)p;
  435. if (!cur->dead && cur->ketama_points != NULL) {
  436. /* Find nearest ketama point for this key */
  437. step = cur->ketama_points_size / 2;
  438. middle = step;
  439. while (step != 1) {
  440. d = cur->ketama_points[middle] - h;
  441. if (abs (d) < min_diff) {
  442. min_diff = abs (d);
  443. nearest = cur;
  444. }
  445. step /= 2;
  446. if (d > 0) {
  447. middle -= step;
  448. }
  449. else {
  450. middle += step;
  451. }
  452. }
  453. }
  454. }
  455. U_UNLOCK ();
  456. return nearest;
  457. }
  458. #undef U_LOCK
  459. #undef U_UNLOCK
  460. #undef msg_debug
  461. /*
  462. * vi:ts=4
  463. */