You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

map.c 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*
  17. * Implementation of map files handling
  18. */
  19. #include "config.h"
  20. #include "map.h"
  21. #include "http.h"
  22. #include "rspamd.h"
  23. #include "cryptobox.h"
  24. #include "unix-std.h"
  25. static const gchar *hash_fill = "1";
  26. /**
  27. * Data specific to file maps
  28. */
  29. struct file_map_data {
  30. const gchar *filename;
  31. struct stat st;
  32. };
  33. /**
  34. * Data specific to HTTP maps
  35. */
  36. struct http_map_data {
  37. struct addrinfo *addr;
  38. guint16 port;
  39. gchar *path;
  40. gchar *host;
  41. time_t last_checked;
  42. gboolean request_sent;
  43. struct rspamd_http_connection *conn;
  44. };
  45. struct http_callback_data {
  46. struct event_base *ev_base;
  47. struct timeval tv;
  48. struct rspamd_map *map;
  49. struct http_map_data *data;
  50. struct map_cb_data cbdata;
  51. GString *remain_buf;
  52. gint fd;
  53. };
  54. /* Value in seconds after whitch we would try to do stat on list file */
  55. /* HTTP timeouts */
  56. #define HTTP_CONNECT_TIMEOUT 2
  57. #define HTTP_READ_TIMEOUT 10
  58. /**
  59. * Helper for HTTP connection establishment
  60. */
  61. static gint
  62. connect_http (struct rspamd_map *map,
  63. struct http_map_data *data,
  64. gboolean is_async)
  65. {
  66. gint sock;
  67. rspamd_mempool_t *pool;
  68. pool = map->pool;
  69. if ((sock = rspamd_socket_tcp (data->addr, FALSE, is_async)) == -1) {
  70. msg_info_pool ("cannot connect to http server %s: %d, %s",
  71. data->host,
  72. errno,
  73. strerror (errno));
  74. return -1;
  75. }
  76. return sock;
  77. }
  78. /**
  79. * Write HTTP request
  80. */
  81. static void
  82. write_http_request (struct http_callback_data *cbd)
  83. {
  84. gchar datebuf[128];
  85. struct tm *tm;
  86. struct rspamd_http_message *msg;
  87. msg = rspamd_http_new_message (HTTP_REQUEST);
  88. msg->url = rspamd_fstring_new_init (cbd->data->path, strlen (cbd->data->path));
  89. if (cbd->data->last_checked != 0) {
  90. tm = gmtime (&cbd->data->last_checked);
  91. strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %H:%M:%S %Z", tm);
  92. rspamd_http_message_add_header (msg, "If-Modified-Since", datebuf);
  93. }
  94. rspamd_http_connection_write_message (cbd->data->conn, msg, cbd->data->host,
  95. NULL, cbd, cbd->fd, &cbd->tv, cbd->ev_base);
  96. }
  97. /**
  98. * Callback for destroying HTTP callback data
  99. */
  100. static void
  101. free_http_cbdata (struct http_callback_data *cbd)
  102. {
  103. g_atomic_int_set (cbd->map->locked, 0);
  104. if (cbd->remain_buf) {
  105. g_string_free (cbd->remain_buf, TRUE);
  106. }
  107. rspamd_http_connection_reset (cbd->data->conn);
  108. close (cbd->fd);
  109. g_slice_free1 (sizeof (struct http_callback_data), cbd);
  110. }
  111. /*
  112. * HTTP callbacks
  113. */
  114. static void
  115. http_map_error (struct rspamd_http_connection *conn,
  116. GError *err)
  117. {
  118. struct http_callback_data *cbd = conn->ud;
  119. rspamd_mempool_t *pool;
  120. pool = cbd->map->pool;
  121. msg_err_pool ("connection with http server terminated incorrectly: %s",
  122. err->message);
  123. free_http_cbdata (cbd);
  124. }
  125. static int
  126. http_map_finish (struct rspamd_http_connection *conn,
  127. struct rspamd_http_message *msg)
  128. {
  129. struct http_callback_data *cbd = conn->ud;
  130. struct rspamd_map *map;
  131. rspamd_mempool_t *pool;
  132. map = cbd->map;
  133. pool = cbd->map->pool;
  134. if (msg->code == 200) {
  135. if (cbd->remain_buf != NULL) {
  136. /* Append \n to avoid issues */
  137. g_string_append_c (cbd->remain_buf, '\n');
  138. map->read_callback (map->pool, cbd->remain_buf->str,
  139. cbd->remain_buf->len, &cbd->cbdata);
  140. }
  141. map->fin_callback (map->pool, &cbd->cbdata);
  142. *map->user_data = cbd->cbdata.cur_data;
  143. cbd->data->last_checked = msg->date;
  144. msg_info_pool ("read map data from %s", cbd->data->host);
  145. }
  146. else if (msg->code == 304) {
  147. msg_debug_pool ("data is not modified for server %s",
  148. cbd->data->host);
  149. cbd->data->last_checked = msg->date;
  150. }
  151. else {
  152. msg_info_pool ("cannot load map %s from %s: HTTP error %d",
  153. map->uri, cbd->data->host, msg->code);
  154. }
  155. free_http_cbdata (cbd);
  156. return 0;
  157. }
  158. static int
  159. http_map_read (struct rspamd_http_connection *conn,
  160. struct rspamd_http_message *msg,
  161. const gchar *chunk,
  162. gsize len)
  163. {
  164. struct http_callback_data *cbd = conn->ud;
  165. gchar *pos;
  166. struct rspamd_map *map;
  167. if (msg->code != 200 || len == 0) {
  168. /* Ignore not full replies */
  169. return 0;
  170. }
  171. map = cbd->map;
  172. if (cbd->remain_buf != NULL) {
  173. /* We need to concatenate incoming buf with the remaining buf */
  174. g_string_append_len (cbd->remain_buf, chunk, len);
  175. pos = map->read_callback (map->pool, cbd->remain_buf->str,
  176. cbd->remain_buf->len, &cbd->cbdata);
  177. /* All read */
  178. if (pos == NULL) {
  179. g_string_free (cbd->remain_buf, TRUE);
  180. cbd->remain_buf = NULL;
  181. }
  182. else {
  183. /* Need to erase data processed */
  184. g_string_erase (cbd->remain_buf, 0, pos - cbd->remain_buf->str);
  185. }
  186. }
  187. else {
  188. pos = map->read_callback (map->pool, (gchar *)chunk, len, &cbd->cbdata);
  189. if (pos != NULL) {
  190. /* Store data in remain buf */
  191. cbd->remain_buf = g_string_new_len (pos, len - (pos - chunk));
  192. }
  193. }
  194. return 0;
  195. }
  196. /**
  197. * Callback for reading data from file
  198. */
  199. static void
  200. read_map_file (struct rspamd_map *map, struct file_map_data *data)
  201. {
  202. struct map_cb_data cbdata;
  203. gchar buf[BUFSIZ], *remain = NULL;
  204. ssize_t r;
  205. gint fd, rlen, tlen;
  206. rspamd_mempool_t *pool = map->pool;
  207. if (map->read_callback == NULL || map->fin_callback == NULL) {
  208. msg_err_pool ("bad callback for reading map file");
  209. return;
  210. }
  211. if ((fd = open (data->filename, O_RDONLY)) == -1) {
  212. msg_warn_pool ("cannot open file '%s': %s", data->filename,
  213. strerror (errno));
  214. return;
  215. }
  216. cbdata.state = 0;
  217. cbdata.prev_data = *map->user_data;
  218. cbdata.cur_data = NULL;
  219. cbdata.map = map;
  220. rlen = 0;
  221. tlen = 0;
  222. while ((r = read (fd, buf + rlen, sizeof (buf) - rlen - 2)) > 0) {
  223. r += rlen;
  224. tlen += r;
  225. buf[r] = '\0';
  226. remain = map->read_callback (map->pool, buf, r, &cbdata);
  227. if (remain != NULL) {
  228. /* copy remaining buffer to start of buffer */
  229. rlen = r - (remain - buf);
  230. memmove (buf, remain, rlen);
  231. }
  232. else {
  233. rlen = 0;
  234. }
  235. }
  236. if (remain != NULL && remain > buf) {
  237. g_assert (rlen <= (gint)sizeof (buf) - 2);
  238. buf[rlen++] = '\n';
  239. buf[rlen] = '\0';
  240. tlen += rlen;
  241. map->read_callback (map->pool, buf, rlen, &cbdata);
  242. }
  243. close (fd);
  244. if (tlen > 0) {
  245. map->fin_callback (map->pool, &cbdata);
  246. *map->user_data = cbdata.cur_data;
  247. }
  248. }
  249. static void
  250. jitter_timeout_event (struct rspamd_map *map, gboolean locked, gboolean initial)
  251. {
  252. gdouble jittered_sec;
  253. gdouble timeout = initial ? 1.0 : map->cfg->map_timeout;
  254. /* Plan event again with jitter */
  255. evtimer_del (&map->ev);
  256. jittered_sec = rspamd_time_jitter (locked ? timeout * 4 : timeout, 0);
  257. double_to_tv (jittered_sec, &map->tv);
  258. evtimer_add (&map->ev, &map->tv);
  259. }
  260. /**
  261. * Common file callback
  262. */
  263. static void
  264. file_callback (gint fd, short what, void *ud)
  265. {
  266. struct rspamd_map *map = ud;
  267. struct file_map_data *data = map->map_data;
  268. struct stat st;
  269. rspamd_mempool_t *pool;
  270. pool = map->pool;
  271. if (g_atomic_int_get (map->locked)) {
  272. msg_info_pool (
  273. "don't try to reread map as it is locked by other process, will reread it later");
  274. jitter_timeout_event (map, TRUE, FALSE);
  275. return;
  276. }
  277. g_atomic_int_inc (map->locked);
  278. jitter_timeout_event (map, FALSE, FALSE);
  279. if (stat (data->filename,
  280. &st) != -1 &&
  281. (st.st_mtime > data->st.st_mtime || data->st.st_mtime == -1)) {
  282. /* File was modified since last check */
  283. memcpy (&data->st, &st, sizeof (struct stat));
  284. }
  285. else {
  286. g_atomic_int_set (map->locked, 0);
  287. return;
  288. }
  289. msg_info_pool ("rereading map file %s", data->filename);
  290. read_map_file (map, data);
  291. g_atomic_int_set (map->locked, 0);
  292. }
  293. /**
  294. * Async HTTP callback
  295. */
  296. static void
  297. http_callback (gint fd, short what, void *ud)
  298. {
  299. struct rspamd_map *map = ud;
  300. struct http_map_data *data;
  301. gint sock;
  302. struct http_callback_data *cbd;
  303. rspamd_mempool_t *pool;
  304. data = map->map_data;
  305. pool = map->pool;
  306. if (g_atomic_int_get (map->locked)) {
  307. msg_info_pool (
  308. "don't try to reread map as it is locked by other process, will reread it later");
  309. if (data->conn->ud == NULL) {
  310. jitter_timeout_event (map, TRUE, TRUE);
  311. }
  312. else {
  313. jitter_timeout_event (map, TRUE, FALSE);
  314. }
  315. return;
  316. }
  317. g_atomic_int_inc (map->locked);
  318. jitter_timeout_event (map, FALSE, FALSE);
  319. /* Connect asynced */
  320. if ((sock = connect_http (map, data, TRUE)) == -1) {
  321. g_atomic_int_set (map->locked, 0);
  322. return;
  323. }
  324. else {
  325. /* Plan event */
  326. cbd = g_slice_alloc (sizeof (struct http_callback_data));
  327. cbd->ev_base = map->ev_base;
  328. cbd->map = map;
  329. cbd->data = data;
  330. cbd->remain_buf = NULL;
  331. cbd->cbdata.state = 0;
  332. cbd->cbdata.prev_data = *cbd->map->user_data;
  333. cbd->cbdata.cur_data = NULL;
  334. cbd->cbdata.map = cbd->map;
  335. cbd->tv.tv_sec = HTTP_CONNECT_TIMEOUT;
  336. cbd->tv.tv_usec = 0;
  337. cbd->fd = sock;
  338. data->conn->ud = cbd;
  339. msg_debug_pool ("reading map data from %s", data->host);
  340. write_http_request (cbd);
  341. }
  342. }
  343. /* Start watching event for all maps */
  344. void
  345. rspamd_map_watch (struct rspamd_config *cfg, struct event_base *ev_base)
  346. {
  347. GList *cur = cfg->maps;
  348. struct rspamd_map *map;
  349. struct file_map_data *fdata;
  350. /* First of all do synced read of data */
  351. while (cur) {
  352. map = cur->data;
  353. map->ev_base = ev_base;
  354. event_base_set (map->ev_base, &map->ev);
  355. if (map->protocol == MAP_PROTO_FILE) {
  356. evtimer_set (&map->ev, file_callback, map);
  357. /* Read initial data */
  358. fdata = map->map_data;
  359. if (fdata->st.st_mtime != -1) {
  360. /* Do not try to read non-existent file */
  361. read_map_file (map, map->map_data);
  362. }
  363. /* Plan event with jitter */
  364. jitter_timeout_event (map, FALSE, TRUE);
  365. }
  366. else if (map->protocol == MAP_PROTO_HTTP) {
  367. evtimer_set (&map->ev, http_callback, map);
  368. jitter_timeout_event (map, FALSE, TRUE);
  369. }
  370. cur = g_list_next (cur);
  371. }
  372. }
  373. void
  374. rspamd_map_remove_all (struct rspamd_config *cfg)
  375. {
  376. g_list_free (cfg->maps);
  377. cfg->maps = NULL;
  378. if (cfg->map_pool != NULL) {
  379. rspamd_mempool_delete (cfg->map_pool);
  380. cfg->map_pool = NULL;
  381. }
  382. }
  383. gboolean
  384. rspamd_map_check_proto (const gchar *map_line, gint *res, const gchar **pos)
  385. {
  386. g_assert (res != NULL);
  387. g_assert (pos != NULL);
  388. if (g_ascii_strncasecmp (map_line, "http://",
  389. sizeof ("http://") - 1) == 0) {
  390. *res = MAP_PROTO_HTTP;
  391. *pos = map_line + sizeof ("http://") - 1;
  392. }
  393. else if (g_ascii_strncasecmp (map_line, "file://", sizeof ("file://") -
  394. 1) == 0) {
  395. *res = MAP_PROTO_FILE;
  396. *pos = map_line + sizeof ("file://") - 1;
  397. }
  398. else if (*map_line == '/') {
  399. /* Trivial file case */
  400. *res = MAP_PROTO_FILE;
  401. *pos = map_line;
  402. }
  403. else {
  404. msg_debug ("invalid map fetching protocol: %s", map_line);
  405. return FALSE;
  406. }
  407. return TRUE;
  408. }
  409. gboolean
  410. rspamd_map_add (struct rspamd_config *cfg,
  411. const gchar *map_line,
  412. const gchar *description,
  413. map_cb_t read_callback,
  414. map_fin_cb_t fin_callback,
  415. void **user_data)
  416. {
  417. struct rspamd_map *new_map;
  418. enum fetch_proto proto;
  419. const gchar *def, *p, *hostend;
  420. struct file_map_data *fdata;
  421. struct http_map_data *hdata;
  422. gchar portbuf[6], *cksum_encoded, cksum[rspamd_cryptobox_HASHBYTES];
  423. gint i, s, r;
  424. struct addrinfo hints, *res;
  425. rspamd_mempool_t *pool;
  426. /* First of all detect protocol line */
  427. if (!rspamd_map_check_proto (map_line, (int *)&proto, &def)) {
  428. return FALSE;
  429. }
  430. /* Constant pool */
  431. if (cfg->map_pool == NULL) {
  432. cfg->map_pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
  433. "map");
  434. memcpy (cfg->map_pool->tag.uid, cfg->cfg_pool->tag.uid,
  435. sizeof (cfg->map_pool->tag.uid));
  436. }
  437. new_map = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct rspamd_map));
  438. new_map->read_callback = read_callback;
  439. new_map->fin_callback = fin_callback;
  440. new_map->user_data = user_data;
  441. new_map->protocol = proto;
  442. new_map->cfg = cfg;
  443. new_map->id = g_random_int ();
  444. new_map->locked =
  445. rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint));
  446. if (proto == MAP_PROTO_FILE) {
  447. new_map->uri = rspamd_mempool_strdup (cfg->cfg_pool, def);
  448. def = new_map->uri;
  449. }
  450. else {
  451. new_map->uri = rspamd_mempool_strdup (cfg->cfg_pool, map_line);
  452. }
  453. if (description != NULL) {
  454. new_map->description =
  455. rspamd_mempool_strdup (cfg->cfg_pool, description);
  456. }
  457. /* Now check for each proto separately */
  458. if (proto == MAP_PROTO_FILE) {
  459. fdata =
  460. rspamd_mempool_alloc0 (cfg->map_pool,
  461. sizeof (struct file_map_data));
  462. if (access (def, R_OK) == -1) {
  463. if (errno != ENOENT) {
  464. msg_err_config ("cannot open file '%s': %s", def, strerror
  465. (errno));
  466. return FALSE;
  467. }
  468. msg_info_config (
  469. "map '%s' is not found, but it can be loaded automatically later",
  470. def);
  471. /* We still can add this file */
  472. fdata->st.st_mtime = -1;
  473. }
  474. else {
  475. stat (def, &fdata->st);
  476. }
  477. fdata->filename = rspamd_mempool_strdup (cfg->map_pool, def);
  478. new_map->map_data = fdata;
  479. }
  480. else if (proto == MAP_PROTO_HTTP) {
  481. hdata =
  482. rspamd_mempool_alloc0 (cfg->map_pool,
  483. sizeof (struct http_map_data));
  484. /* Try to search port */
  485. if ((p = strchr (def, ':')) != NULL) {
  486. hostend = p;
  487. i = 0;
  488. p++;
  489. while (g_ascii_isdigit (*p) && i < (gint)sizeof (portbuf) - 1) {
  490. portbuf[i++] = *p++;
  491. }
  492. if (*p != '/') {
  493. msg_info_config ("bad http map definition: %s", def);
  494. return FALSE;
  495. }
  496. portbuf[i] = '\0';
  497. hdata->port = atoi (portbuf);
  498. }
  499. else {
  500. /* Default http port */
  501. rspamd_snprintf (portbuf, sizeof (portbuf), "80");
  502. hdata->port = 80;
  503. /* Now separate host from path */
  504. if ((p = strchr (def, '/')) == NULL) {
  505. msg_info_config ("bad http map definition: %s", def);
  506. return FALSE;
  507. }
  508. hostend = p;
  509. }
  510. hdata->host = rspamd_mempool_alloc (cfg->map_pool, hostend - def + 1);
  511. rspamd_strlcpy (hdata->host, def, hostend - def + 1);
  512. hdata->path = rspamd_mempool_strdup (cfg->map_pool, p);
  513. /* Now try to resolve */
  514. memset (&hints, 0, sizeof (hints));
  515. hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */
  516. hints.ai_socktype = SOCK_STREAM; /* Stream socket */
  517. hints.ai_flags = 0;
  518. hints.ai_protocol = 0; /* Any protocol */
  519. hints.ai_canonname = NULL;
  520. hints.ai_addr = NULL;
  521. hints.ai_next = NULL;
  522. if ((r = getaddrinfo (hdata->host, portbuf, &hints, &res)) == 0) {
  523. hdata->addr = res;
  524. rspamd_mempool_add_destructor (cfg->cfg_pool,
  525. (rspamd_mempool_destruct_t)freeaddrinfo, hdata->addr);
  526. }
  527. else {
  528. msg_err_config ("address resolution for %s failed: %s",
  529. hdata->host,
  530. gai_strerror (r));
  531. return FALSE;
  532. }
  533. /* Now try to connect */
  534. if ((s = rspamd_socket_tcp (hdata->addr, FALSE, FALSE)) == -1) {
  535. msg_info_config ("cannot connect to http server %s: %d, %s",
  536. hdata->host,
  537. errno,
  538. strerror (errno));
  539. return FALSE;
  540. }
  541. close (s);
  542. hdata->conn = rspamd_http_connection_new (http_map_read, http_map_error,
  543. http_map_finish,
  544. RSPAMD_HTTP_BODY_PARTIAL | RSPAMD_HTTP_CLIENT_SIMPLE,
  545. RSPAMD_HTTP_CLIENT, NULL);
  546. new_map->map_data = hdata;
  547. }
  548. /* Temp pool */
  549. rspamd_cryptobox_hash (cksum, new_map->uri, strlen (new_map->uri), NULL, 0);
  550. cksum_encoded = rspamd_encode_base32 (cksum, sizeof (cksum));
  551. new_map->pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "map");
  552. memcpy (new_map->pool->tag.uid, cksum_encoded,
  553. sizeof (new_map->pool->tag.uid));
  554. g_free (cksum_encoded);
  555. pool = new_map->pool;
  556. msg_info_pool ("added map %s", new_map->uri);
  557. cfg->maps = g_list_prepend (cfg->maps, new_map);
  558. return TRUE;
  559. }
  560. static gchar*
  561. strip_map_elt (rspamd_mempool_t *pool, const gchar *start,
  562. size_t len)
  563. {
  564. gchar *res = NULL;
  565. const gchar *c = start, *p = start + len - 1;
  566. /* Strip starting spaces */
  567. while (g_ascii_isspace (*c)) {
  568. c ++;
  569. }
  570. /* Strip ending spaces */
  571. while (g_ascii_isspace (*p) && p >= c) {
  572. p --;
  573. }
  574. /* One symbol up */
  575. p ++;
  576. if (p - c > 0) {
  577. res = rspamd_mempool_alloc (pool, p - c + 1);
  578. rspamd_strlcpy (res, c, p - c + 1);
  579. }
  580. return res;
  581. }
  582. /**
  583. * FSM for parsing lists
  584. */
  585. gchar *
  586. abstract_parse_kv_list (rspamd_mempool_t * pool,
  587. gchar * chunk,
  588. gint len,
  589. struct map_cb_data *data,
  590. insert_func func)
  591. {
  592. gchar *c, *p, *key = NULL, *value = NULL, *end;
  593. p = chunk;
  594. c = p;
  595. end = p + len;
  596. while (p < end) {
  597. switch (data->state) {
  598. case 0:
  599. /* read key */
  600. /* Check here comments, eol and end of buffer */
  601. if (*p == '#') {
  602. if (key != NULL && p - c >= 0) {
  603. value = rspamd_mempool_alloc (pool, p - c + 1);
  604. memcpy (value, c, p - c);
  605. value[p - c] = '\0';
  606. value = g_strstrip (value);
  607. func (data->cur_data, key, value);
  608. msg_debug_pool ("insert kv pair: %s -> %s", key, value);
  609. }
  610. data->state = 99;
  611. }
  612. else if (*p == '\r' || *p == '\n') {
  613. if (key != NULL && p - c >= 0) {
  614. value = rspamd_mempool_alloc (pool, p - c + 1);
  615. memcpy (value, c, p - c);
  616. value[p - c] = '\0';
  617. value = g_strstrip (value);
  618. func (data->cur_data, key, value);
  619. msg_debug_pool ("insert kv pair: %s -> %s", key, value);
  620. }
  621. else if (key == NULL && p - c > 0) {
  622. /* Key only line */
  623. key = rspamd_mempool_alloc (pool, p - c + 1);
  624. memcpy (key, c, p - c);
  625. key[p - c] = '\0';
  626. value = rspamd_mempool_alloc (pool, 1);
  627. *value = '\0';
  628. func (data->cur_data, key, value);
  629. msg_debug_pool ("insert kv pair: %s -> %s", key, value);
  630. }
  631. data->state = 100;
  632. key = NULL;
  633. }
  634. else if (g_ascii_isspace (*p)) {
  635. if (p - c > 0) {
  636. key = rspamd_mempool_alloc (pool, p - c + 1);
  637. memcpy (key, c, p - c);
  638. key[p - c] = '\0';
  639. data->state = 2;
  640. }
  641. else {
  642. key = NULL;
  643. }
  644. }
  645. else {
  646. p++;
  647. }
  648. break;
  649. case 2:
  650. /* Skip spaces before value */
  651. if (!g_ascii_isspace (*p)) {
  652. c = p;
  653. data->state = 0;
  654. }
  655. else {
  656. p++;
  657. }
  658. break;
  659. case 99:
  660. /* SKIP_COMMENT */
  661. /* Skip comment till end of line */
  662. if (*p == '\r' || *p == '\n') {
  663. while ((*p == '\r' || *p == '\n') && p < end) {
  664. p++;
  665. }
  666. c = p;
  667. key = NULL;
  668. data->state = 0;
  669. }
  670. else {
  671. p++;
  672. }
  673. break;
  674. case 100:
  675. /* Skip \r\n and whitespaces */
  676. if (*p == '\r' || *p == '\n' || g_ascii_isspace (*p)) {
  677. p++;
  678. }
  679. else {
  680. c = p;
  681. key = NULL;
  682. data->state = 0;
  683. }
  684. break;
  685. }
  686. }
  687. return c;
  688. }
  689. gchar *
  690. rspamd_parse_abstract_list (rspamd_mempool_t * pool,
  691. gchar * chunk,
  692. gint len,
  693. struct map_cb_data *data,
  694. insert_func func)
  695. {
  696. gchar *p, *c, *end, *s;
  697. p = chunk;
  698. c = p;
  699. end = p + len;
  700. while (p < end) {
  701. switch (data->state) {
  702. /* READ_SYMBOL */
  703. case 0:
  704. if (*p == '#') {
  705. /* Got comment */
  706. if (p > c) {
  707. /* Save previous string in lines like: "127.0.0.1 #localhost" */
  708. s = strip_map_elt (pool, c, p - c);
  709. if (s) {
  710. func (data->cur_data, s, hash_fill);
  711. msg_debug_pool ("insert element (before comment): %s", s);
  712. }
  713. }
  714. c = p;
  715. data->state = 1;
  716. }
  717. else if (*p == '\r' || *p == '\n') {
  718. /* Got EOL marker, save stored string */
  719. s = strip_map_elt (pool, c, p - c);
  720. if (s) {
  721. func (data->cur_data, s, hash_fill);
  722. msg_debug_pool ("insert element (before EOL): %s", s);
  723. }
  724. /* Skip EOL symbols */
  725. while ((*p == '\r' || *p == '\n') && p < end) {
  726. p++;
  727. }
  728. if (p == end) {
  729. p ++;
  730. c = NULL;
  731. }
  732. else {
  733. c = p;
  734. }
  735. }
  736. else {
  737. p++;
  738. }
  739. break;
  740. /* SKIP_COMMENT */
  741. case 1:
  742. /* Skip comment till end of line */
  743. if (*p == '\r' || *p == '\n') {
  744. while ((*p == '\r' || *p == '\n') && p < end) {
  745. p++;
  746. }
  747. if (p == end) {
  748. p ++;
  749. c = NULL;
  750. }
  751. else {
  752. c = p;
  753. }
  754. data->state = 0;
  755. }
  756. else {
  757. p++;
  758. }
  759. break;
  760. }
  761. }
  762. if (c >= end) {
  763. c = NULL;
  764. }
  765. return c;
  766. }
  767. /**
  768. * Radix tree helper function
  769. */
  770. static void
  771. radix_tree_insert_helper (gpointer st, gconstpointer key, gpointer value)
  772. {
  773. radix_compressed_t *tree = (radix_compressed_t *)st;
  774. rspamd_radix_add_iplist ((gchar *)key, " ,;", tree);
  775. }
  776. /* Helpers */
  777. gchar *
  778. rspamd_hosts_read (rspamd_mempool_t * pool,
  779. gchar * chunk,
  780. gint len,
  781. struct map_cb_data *data)
  782. {
  783. if (data->cur_data == NULL) {
  784. data->cur_data = g_hash_table_new (rspamd_strcase_hash,
  785. rspamd_strcase_equal);
  786. }
  787. return rspamd_parse_abstract_list (pool,
  788. chunk,
  789. len,
  790. data,
  791. (insert_func) g_hash_table_insert);
  792. }
  793. void
  794. rspamd_hosts_fin (rspamd_mempool_t * pool, struct map_cb_data *data)
  795. {
  796. if (data->prev_data) {
  797. g_hash_table_destroy (data->prev_data);
  798. }
  799. if (data->cur_data) {
  800. msg_info_pool ("read hash of %d elements", g_hash_table_size
  801. (data->cur_data));
  802. }
  803. }
  804. gchar *
  805. rspamd_kv_list_read (rspamd_mempool_t * pool,
  806. gchar * chunk,
  807. gint len,
  808. struct map_cb_data *data)
  809. {
  810. if (data->cur_data == NULL) {
  811. data->cur_data = g_hash_table_new (rspamd_strcase_hash,
  812. rspamd_strcase_equal);
  813. }
  814. return abstract_parse_kv_list (pool,
  815. chunk,
  816. len,
  817. data,
  818. (insert_func) g_hash_table_insert);
  819. }
  820. void
  821. rspamd_kv_list_fin (rspamd_mempool_t * pool, struct map_cb_data *data)
  822. {
  823. if (data->prev_data) {
  824. g_hash_table_destroy (data->prev_data);
  825. }
  826. if (data->cur_data) {
  827. msg_info_pool ("read hash of %d elements", g_hash_table_size
  828. (data->cur_data));
  829. }
  830. }
  831. gchar *
  832. rspamd_radix_read (rspamd_mempool_t * pool,
  833. gchar * chunk,
  834. gint len,
  835. struct map_cb_data *data)
  836. {
  837. radix_compressed_t *tree;
  838. rspamd_mempool_t *rpool;
  839. if (data->cur_data == NULL) {
  840. tree = radix_create_compressed ();
  841. rpool = radix_get_pool (tree);
  842. memcpy (rpool->tag.uid, pool->tag.uid, sizeof (rpool->tag.uid));
  843. data->cur_data = tree;
  844. }
  845. return rspamd_parse_abstract_list (pool,
  846. chunk,
  847. len,
  848. data,
  849. (insert_func) radix_tree_insert_helper);
  850. }
  851. void
  852. rspamd_radix_fin (rspamd_mempool_t * pool, struct map_cb_data *data)
  853. {
  854. if (data->prev_data) {
  855. radix_destroy_compressed (data->prev_data);
  856. }
  857. if (data->cur_data) {
  858. msg_info_pool ("read radix trie of %z elements", radix_get_size
  859. (data->cur_data));
  860. }
  861. }