You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mmaped_file.c 26KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "stat_internal.h"
  18. #include "unix-std.h"
  19. #define CHAIN_LENGTH 128
  20. /* Section types */
  21. #define STATFILE_SECTION_COMMON 1
  22. /**
  23. * Common statfile header
  24. */
  25. struct stat_file_header {
  26. u_char magic[3]; /**< magic signature ('r' 's' 'd') */
  27. u_char version[2]; /**< version of statfile */
  28. u_char padding[3]; /**< padding */
  29. uint64_t create_time; /**< create time (time_t->uint64_t) */
  30. uint64_t revision; /**< revision number */
  31. uint64_t rev_time; /**< revision time */
  32. uint64_t used_blocks; /**< used blocks number */
  33. uint64_t total_blocks; /**< total number of blocks */
  34. uint64_t tokenizer_conf_len; /**< length of tokenizer configuration */
  35. u_char unused[231]; /**< some bytes that can be used in future */
  36. };
  37. /**
  38. * Section header
  39. */
  40. struct stat_file_section {
  41. uint64_t code; /**< section's code */
  42. uint64_t length; /**< section's length in blocks */
  43. };
  44. /**
  45. * Block of data in statfile
  46. */
  47. struct stat_file_block {
  48. uint32_t hash1; /**< hash1 (also acts as index) */
  49. uint32_t hash2; /**< hash2 */
  50. double value; /**< double value */
  51. };
  52. /**
  53. * Statistic file
  54. */
  55. struct stat_file {
  56. struct stat_file_header header; /**< header */
  57. struct stat_file_section section; /**< first section */
  58. struct stat_file_block blocks[1]; /**< first block of data */
  59. };
  60. /**
  61. * Common view of statfile object
  62. */
  63. typedef struct {
  64. #ifdef HAVE_PATH_MAX
  65. gchar filename[PATH_MAX]; /**< name of file */
  66. #else
  67. gchar filename[MAXPATHLEN]; /**< name of file */
  68. #endif
  69. rspamd_mempool_t *pool;
  70. gint fd; /**< descriptor */
  71. void *map; /**< mmaped area */
  72. off_t seek_pos; /**< current seek position */
  73. struct stat_file_section cur_section; /**< current section */
  74. size_t len; /**< length of file(in bytes) */
  75. struct rspamd_statfile_config *cf;
  76. } rspamd_mmaped_file_t;
  77. #define RSPAMD_STATFILE_VERSION \
  78. { \
  79. '1', '2' \
  80. }
  81. #define BACKUP_SUFFIX ".old"
  82. static void rspamd_mmaped_file_set_block_common(rspamd_mempool_t *pool,
  83. rspamd_mmaped_file_t *file,
  84. uint32_t h1, uint32_t h2, double value);
  85. rspamd_mmaped_file_t *rspamd_mmaped_file_open(rspamd_mempool_t *pool,
  86. const gchar *filename, size_t size,
  87. struct rspamd_statfile_config *stcf);
  88. gint rspamd_mmaped_file_create(const gchar *filename, size_t size,
  89. struct rspamd_statfile_config *stcf,
  90. rspamd_mempool_t *pool);
  91. gint rspamd_mmaped_file_close_file(rspamd_mempool_t *pool,
  92. rspamd_mmaped_file_t *file);
  93. double
  94. rspamd_mmaped_file_get_block(rspamd_mmaped_file_t *file,
  95. uint32_t h1,
  96. uint32_t h2)
  97. {
  98. struct stat_file_block *block;
  99. guint i, blocknum;
  100. u_char *c;
  101. if (!file->map) {
  102. return 0;
  103. }
  104. blocknum = h1 % file->cur_section.length;
  105. c = (u_char *) file->map + file->seek_pos + blocknum * sizeof(struct stat_file_block);
  106. block = (struct stat_file_block *) c;
  107. for (i = 0; i < CHAIN_LENGTH; i++) {
  108. if (i + blocknum >= file->cur_section.length) {
  109. break;
  110. }
  111. if (block->hash1 == h1 && block->hash2 == h2) {
  112. return block->value;
  113. }
  114. c += sizeof(struct stat_file_block);
  115. block = (struct stat_file_block *) c;
  116. }
  117. return 0;
  118. }
  119. static void
  120. rspamd_mmaped_file_set_block_common(rspamd_mempool_t *pool,
  121. rspamd_mmaped_file_t *file,
  122. uint32_t h1, uint32_t h2, double value)
  123. {
  124. struct stat_file_block *block, *to_expire = NULL;
  125. struct stat_file_header *header;
  126. guint i, blocknum;
  127. u_char *c;
  128. double min = G_MAXDOUBLE;
  129. if (!file->map) {
  130. return;
  131. }
  132. blocknum = h1 % file->cur_section.length;
  133. header = (struct stat_file_header *) file->map;
  134. c = (u_char *) file->map + file->seek_pos + blocknum * sizeof(struct stat_file_block);
  135. block = (struct stat_file_block *) c;
  136. for (i = 0; i < CHAIN_LENGTH; i++) {
  137. if (i + blocknum >= file->cur_section.length) {
  138. /* Need to expire some block in chain */
  139. msg_info_pool("chain %ud is full in statfile %s, starting expire",
  140. blocknum,
  141. file->filename);
  142. break;
  143. }
  144. /* First try to find block in chain */
  145. if (block->hash1 == h1 && block->hash2 == h2) {
  146. msg_debug_pool("%s found existing block %ud in chain %ud, value %.2f",
  147. file->filename,
  148. i,
  149. blocknum,
  150. value);
  151. block->value = value;
  152. return;
  153. }
  154. /* Check whether we have a free block in chain */
  155. if (block->hash1 == 0 && block->hash2 == 0) {
  156. /* Write new block here */
  157. msg_debug_pool("%s found free block %ud in chain %ud, set h1=%ud, h2=%ud",
  158. file->filename,
  159. i,
  160. blocknum,
  161. h1,
  162. h2);
  163. block->hash1 = h1;
  164. block->hash2 = h2;
  165. block->value = value;
  166. header->used_blocks++;
  167. return;
  168. }
  169. /* Expire block with minimum value otherwise */
  170. if (block->value < min) {
  171. to_expire = block;
  172. min = block->value;
  173. }
  174. c += sizeof(struct stat_file_block);
  175. block = (struct stat_file_block *) c;
  176. }
  177. /* Try expire some block */
  178. if (to_expire) {
  179. block = to_expire;
  180. }
  181. else {
  182. /* Expire first block in chain */
  183. c = (u_char *) file->map + file->seek_pos + blocknum * sizeof(struct stat_file_block);
  184. block = (struct stat_file_block *) c;
  185. }
  186. block->hash1 = h1;
  187. block->hash2 = h2;
  188. block->value = value;
  189. }
  190. void rspamd_mmaped_file_set_block(rspamd_mempool_t *pool,
  191. rspamd_mmaped_file_t *file,
  192. uint32_t h1,
  193. uint32_t h2,
  194. double value)
  195. {
  196. rspamd_mmaped_file_set_block_common(pool, file, h1, h2, value);
  197. }
  198. gboolean
  199. rspamd_mmaped_file_set_revision(rspamd_mmaped_file_t *file, uint64_t rev, time_t time)
  200. {
  201. struct stat_file_header *header;
  202. if (file == NULL || file->map == NULL) {
  203. return FALSE;
  204. }
  205. header = (struct stat_file_header *) file->map;
  206. header->revision = rev;
  207. header->rev_time = time;
  208. return TRUE;
  209. }
  210. gboolean
  211. rspamd_mmaped_file_inc_revision(rspamd_mmaped_file_t *file)
  212. {
  213. struct stat_file_header *header;
  214. if (file == NULL || file->map == NULL) {
  215. return FALSE;
  216. }
  217. header = (struct stat_file_header *) file->map;
  218. header->revision++;
  219. return TRUE;
  220. }
  221. gboolean
  222. rspamd_mmaped_file_dec_revision(rspamd_mmaped_file_t *file)
  223. {
  224. struct stat_file_header *header;
  225. if (file == NULL || file->map == NULL) {
  226. return FALSE;
  227. }
  228. header = (struct stat_file_header *) file->map;
  229. header->revision--;
  230. return TRUE;
  231. }
  232. gboolean
  233. rspamd_mmaped_file_get_revision(rspamd_mmaped_file_t *file, uint64_t *rev, time_t *time)
  234. {
  235. struct stat_file_header *header;
  236. if (file == NULL || file->map == NULL) {
  237. return FALSE;
  238. }
  239. header = (struct stat_file_header *) file->map;
  240. if (rev != NULL) {
  241. *rev = header->revision;
  242. }
  243. if (time != NULL) {
  244. *time = header->rev_time;
  245. }
  246. return TRUE;
  247. }
  248. uint64_t
  249. rspamd_mmaped_file_get_used(rspamd_mmaped_file_t *file)
  250. {
  251. struct stat_file_header *header;
  252. if (file == NULL || file->map == NULL) {
  253. return (uint64_t) -1;
  254. }
  255. header = (struct stat_file_header *) file->map;
  256. return header->used_blocks;
  257. }
  258. uint64_t
  259. rspamd_mmaped_file_get_total(rspamd_mmaped_file_t *file)
  260. {
  261. struct stat_file_header *header;
  262. if (file == NULL || file->map == NULL) {
  263. return (uint64_t) -1;
  264. }
  265. header = (struct stat_file_header *) file->map;
  266. /* If total blocks is 0 we have old version of header, so set total blocks correctly */
  267. if (header->total_blocks == 0) {
  268. header->total_blocks = file->cur_section.length;
  269. }
  270. return header->total_blocks;
  271. }
  272. /* Check whether specified file is statistic file and calculate its len in blocks */
  273. static gint
  274. rspamd_mmaped_file_check(rspamd_mempool_t *pool, rspamd_mmaped_file_t *file)
  275. {
  276. struct stat_file *f;
  277. gchar *c;
  278. static gchar valid_version[] = RSPAMD_STATFILE_VERSION;
  279. if (!file || !file->map) {
  280. return -1;
  281. }
  282. if (file->len < sizeof(struct stat_file)) {
  283. msg_info_pool("file %s is too short to be stat file: %z",
  284. file->filename,
  285. file->len);
  286. return -1;
  287. }
  288. f = (struct stat_file *) file->map;
  289. c = &f->header.magic[0];
  290. /* Check magic and version */
  291. if (*c++ != 'r' || *c++ != 's' || *c++ != 'd') {
  292. msg_info_pool("file %s is invalid stat file", file->filename);
  293. return -1;
  294. }
  295. c = &f->header.version[0];
  296. /* Now check version and convert old version to new one (that can be used for sync */
  297. if (*c == 1 && *(c + 1) == 0) {
  298. return -1;
  299. }
  300. else if (memcmp(c, valid_version, sizeof(valid_version)) != 0) {
  301. /* Unknown version */
  302. msg_info_pool("file %s has invalid version %c.%c",
  303. file->filename,
  304. '0' + *c,
  305. '0' + *(c + 1));
  306. return -1;
  307. }
  308. /* Check first section and set new offset */
  309. file->cur_section.code = f->section.code;
  310. file->cur_section.length = f->section.length;
  311. if (file->cur_section.length * sizeof(struct stat_file_block) >
  312. file->len) {
  313. msg_info_pool("file %s is truncated: %z, must be %z",
  314. file->filename,
  315. file->len,
  316. file->cur_section.length * sizeof(struct stat_file_block));
  317. return -1;
  318. }
  319. file->seek_pos = sizeof(struct stat_file) -
  320. sizeof(struct stat_file_block);
  321. return 0;
  322. }
  323. static rspamd_mmaped_file_t *
  324. rspamd_mmaped_file_reindex(rspamd_mempool_t *pool,
  325. const gchar *filename,
  326. size_t old_size,
  327. size_t size,
  328. struct rspamd_statfile_config *stcf)
  329. {
  330. gchar *backup, *lock;
  331. gint fd, lock_fd;
  332. rspamd_mmaped_file_t *new, *old = NULL;
  333. u_char *map, *pos;
  334. struct stat_file_block *block;
  335. struct stat_file_header *header, *nh;
  336. struct timespec sleep_ts = {
  337. .tv_sec = 0,
  338. .tv_nsec = 1000000};
  339. if (size <
  340. sizeof(struct stat_file_header) + sizeof(struct stat_file_section) +
  341. sizeof(block)) {
  342. msg_err_pool("file %s is too small to carry any statistic: %z",
  343. filename,
  344. size);
  345. return NULL;
  346. }
  347. lock = g_strconcat(filename, ".lock", NULL);
  348. lock_fd = open(lock, O_WRONLY | O_CREAT | O_EXCL, 00600);
  349. while (lock_fd == -1) {
  350. /* Wait for lock */
  351. lock_fd = open(lock, O_WRONLY | O_CREAT | O_EXCL, 00600);
  352. if (lock_fd != -1) {
  353. unlink(lock);
  354. close(lock_fd);
  355. g_free(lock);
  356. return rspamd_mmaped_file_open(pool, filename, size, stcf);
  357. }
  358. else {
  359. nanosleep(&sleep_ts, NULL);
  360. }
  361. }
  362. backup = g_strconcat(filename, ".old", NULL);
  363. if (rename(filename, backup) == -1) {
  364. msg_err_pool("cannot rename %s to %s: %s", filename, backup, strerror(errno));
  365. g_free(backup);
  366. unlink(lock);
  367. g_free(lock);
  368. close(lock_fd);
  369. return NULL;
  370. }
  371. old = rspamd_mmaped_file_open(pool, backup, old_size, stcf);
  372. if (old == NULL) {
  373. msg_warn_pool("old file %s is invalid mmapped file, just move it",
  374. backup);
  375. }
  376. /* We need to release our lock here */
  377. unlink(lock);
  378. close(lock_fd);
  379. g_free(lock);
  380. /* Now create new file with required size */
  381. if (rspamd_mmaped_file_create(filename, size, stcf, pool) != 0) {
  382. msg_err_pool("cannot create new file");
  383. rspamd_mmaped_file_close(old);
  384. g_free(backup);
  385. return NULL;
  386. }
  387. new = rspamd_mmaped_file_open(pool, filename, size, stcf);
  388. if (old) {
  389. /* Now open new file and start copying */
  390. fd = open(backup, O_RDONLY);
  391. if (fd == -1 || new == NULL) {
  392. if (fd != -1) {
  393. close(fd);
  394. }
  395. msg_err_pool("cannot open file: %s", strerror(errno));
  396. rspamd_mmaped_file_close(old);
  397. g_free(backup);
  398. return NULL;
  399. }
  400. /* Now start reading blocks from old statfile */
  401. if ((map =
  402. mmap(NULL, old_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) {
  403. msg_err_pool("cannot mmap file: %s", strerror(errno));
  404. close(fd);
  405. rspamd_mmaped_file_close(old);
  406. g_free(backup);
  407. return NULL;
  408. }
  409. pos = map + (sizeof(struct stat_file) - sizeof(struct stat_file_block));
  410. if (pos - map < (gssize) old_size) {
  411. while ((gssize) old_size - (pos - map) >= (gssize) sizeof(struct stat_file_block)) {
  412. block = (struct stat_file_block *) pos;
  413. if (block->hash1 != 0 && block->value != 0) {
  414. rspamd_mmaped_file_set_block_common(pool,
  415. new, block->hash1,
  416. block->hash2, block->value);
  417. }
  418. pos += sizeof(block);
  419. }
  420. }
  421. header = (struct stat_file_header *) map;
  422. rspamd_mmaped_file_set_revision(new, header->revision, header->rev_time);
  423. nh = new->map;
  424. /* Copy tokenizer configuration */
  425. memcpy(nh->unused, header->unused, sizeof(header->unused));
  426. nh->tokenizer_conf_len = header->tokenizer_conf_len;
  427. munmap(map, old_size);
  428. close(fd);
  429. rspamd_mmaped_file_close_file(pool, old);
  430. }
  431. unlink(backup);
  432. g_free(backup);
  433. return new;
  434. }
  435. /*
  436. * Pre-load mmaped file into memory
  437. */
  438. static void
  439. rspamd_mmaped_file_preload(rspamd_mmaped_file_t *file)
  440. {
  441. guint8 *pos, *end;
  442. volatile guint8 t;
  443. gsize size;
  444. pos = (guint8 *) file->map;
  445. end = (guint8 *) file->map + file->len;
  446. if (madvise(pos, end - pos, MADV_SEQUENTIAL) == -1) {
  447. msg_info("madvise failed: %s", strerror(errno));
  448. }
  449. else {
  450. /* Load pages of file */
  451. #ifdef HAVE_GETPAGESIZE
  452. size = getpagesize();
  453. #else
  454. size = sysconf(_SC_PAGESIZE);
  455. #endif
  456. while (pos < end) {
  457. t = *pos;
  458. (void) t;
  459. pos += size;
  460. }
  461. }
  462. }
  463. rspamd_mmaped_file_t *
  464. rspamd_mmaped_file_open(rspamd_mempool_t *pool,
  465. const gchar *filename, size_t size,
  466. struct rspamd_statfile_config *stcf)
  467. {
  468. struct stat st;
  469. rspamd_mmaped_file_t *new_file;
  470. gchar *lock;
  471. gint lock_fd;
  472. lock = g_strconcat(filename, ".lock", NULL);
  473. lock_fd = open(lock, O_WRONLY | O_CREAT | O_EXCL, 00600);
  474. if (lock_fd == -1) {
  475. g_free(lock);
  476. msg_info_pool("cannot open file %s, it is locked by another process",
  477. filename);
  478. return NULL;
  479. }
  480. close(lock_fd);
  481. unlink(lock);
  482. g_free(lock);
  483. if (stat(filename, &st) == -1) {
  484. msg_info_pool("cannot stat file %s, error %s, %d", filename, strerror(errno), errno);
  485. return NULL;
  486. }
  487. if (labs((glong) size - st.st_size) > (long) sizeof(struct stat_file) * 2 && size > sizeof(struct stat_file)) {
  488. msg_warn_pool("need to reindex statfile old size: %Hz, new size: %Hz",
  489. (size_t) st.st_size, size);
  490. return rspamd_mmaped_file_reindex(pool, filename, st.st_size, size, stcf);
  491. }
  492. else if (size < sizeof(struct stat_file)) {
  493. msg_err_pool("requested to shrink statfile to %Hz but it is too small",
  494. size);
  495. }
  496. new_file = g_malloc0(sizeof(rspamd_mmaped_file_t));
  497. if ((new_file->fd = open(filename, O_RDWR)) == -1) {
  498. msg_info_pool("cannot open file %s, error %d, %s",
  499. filename,
  500. errno,
  501. strerror(errno));
  502. g_free(new_file);
  503. return NULL;
  504. }
  505. if ((new_file->map =
  506. mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED,
  507. new_file->fd, 0)) == MAP_FAILED) {
  508. close(new_file->fd);
  509. msg_info_pool("cannot mmap file %s, error %d, %s",
  510. filename,
  511. errno,
  512. strerror(errno));
  513. g_free(new_file);
  514. return NULL;
  515. }
  516. rspamd_strlcpy(new_file->filename, filename, sizeof(new_file->filename));
  517. new_file->len = st.st_size;
  518. /* Try to lock pages in RAM */
  519. /* Acquire lock for this operation */
  520. if (!rspamd_file_lock(new_file->fd, FALSE)) {
  521. close(new_file->fd);
  522. munmap(new_file->map, st.st_size);
  523. msg_info_pool("cannot lock file %s, error %d, %s",
  524. filename,
  525. errno,
  526. strerror(errno));
  527. g_free(new_file);
  528. return NULL;
  529. }
  530. if (rspamd_mmaped_file_check(pool, new_file) == -1) {
  531. close(new_file->fd);
  532. rspamd_file_unlock(new_file->fd, FALSE);
  533. munmap(new_file->map, st.st_size);
  534. g_free(new_file);
  535. return NULL;
  536. }
  537. rspamd_file_unlock(new_file->fd, FALSE);
  538. new_file->cf = stcf;
  539. new_file->pool = pool;
  540. rspamd_mmaped_file_preload(new_file);
  541. g_assert(stcf->clcf != NULL);
  542. msg_debug_pool("opened statfile %s of size %l", filename, (long) size);
  543. return new_file;
  544. }
  545. gint rspamd_mmaped_file_close_file(rspamd_mempool_t *pool,
  546. rspamd_mmaped_file_t *file)
  547. {
  548. if (file->map) {
  549. msg_info_pool("syncing statfile %s", file->filename);
  550. msync(file->map, file->len, MS_ASYNC);
  551. munmap(file->map, file->len);
  552. }
  553. if (file->fd != -1) {
  554. close(file->fd);
  555. }
  556. g_free(file);
  557. return 0;
  558. }
  559. gint rspamd_mmaped_file_create(const gchar *filename,
  560. size_t size,
  561. struct rspamd_statfile_config *stcf,
  562. rspamd_mempool_t *pool)
  563. {
  564. struct stat_file_header header = {
  565. .magic = {'r', 's', 'd'},
  566. .version = RSPAMD_STATFILE_VERSION,
  567. .padding = {0, 0, 0},
  568. .revision = 0,
  569. .rev_time = 0,
  570. .used_blocks = 0};
  571. struct stat_file_section section = {
  572. .code = STATFILE_SECTION_COMMON,
  573. };
  574. struct stat_file_block block = {0, 0, 0};
  575. struct rspamd_stat_tokenizer *tokenizer;
  576. gint fd, lock_fd;
  577. guint buflen = 0, nblocks;
  578. gchar *buf = NULL, *lock;
  579. struct stat sb;
  580. gpointer tok_conf;
  581. gsize tok_conf_len;
  582. struct timespec sleep_ts = {
  583. .tv_sec = 0,
  584. .tv_nsec = 1000000};
  585. if (size <
  586. sizeof(struct stat_file_header) + sizeof(struct stat_file_section) +
  587. sizeof(block)) {
  588. msg_err_pool("file %s is too small to carry any statistic: %z",
  589. filename,
  590. size);
  591. return -1;
  592. }
  593. lock = g_strconcat(filename, ".lock", NULL);
  594. lock_fd = open(lock, O_WRONLY | O_CREAT | O_EXCL, 00600);
  595. while (lock_fd == -1) {
  596. /* Wait for lock */
  597. lock_fd = open(lock, O_WRONLY | O_CREAT | O_EXCL, 00600);
  598. if (lock_fd != -1) {
  599. if (stat(filename, &sb) != -1) {
  600. /* File has been created by some other process */
  601. unlink(lock);
  602. close(lock_fd);
  603. g_free(lock);
  604. return 0;
  605. }
  606. /* We still need to create it */
  607. goto create;
  608. }
  609. else {
  610. nanosleep(&sleep_ts, NULL);
  611. }
  612. }
  613. create:
  614. msg_debug_pool("create statfile %s of size %l", filename, (long) size);
  615. nblocks =
  616. (size - sizeof(struct stat_file_header) -
  617. sizeof(struct stat_file_section)) /
  618. sizeof(struct stat_file_block);
  619. header.total_blocks = nblocks;
  620. if ((fd =
  621. open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) {
  622. msg_info_pool("cannot create file %s, error %d, %s",
  623. filename,
  624. errno,
  625. strerror(errno));
  626. unlink(lock);
  627. close(lock_fd);
  628. g_free(lock);
  629. return -1;
  630. }
  631. rspamd_fallocate(fd,
  632. 0,
  633. sizeof(header) + sizeof(section) + sizeof(block) * nblocks);
  634. header.create_time = (uint64_t) time(NULL);
  635. g_assert(stcf->clcf != NULL);
  636. g_assert(stcf->clcf->tokenizer != NULL);
  637. tokenizer = rspamd_stat_get_tokenizer(stcf->clcf->tokenizer->name);
  638. g_assert(tokenizer != NULL);
  639. tok_conf = tokenizer->get_config(pool, stcf->clcf->tokenizer, &tok_conf_len);
  640. header.tokenizer_conf_len = tok_conf_len;
  641. g_assert(tok_conf_len < sizeof(header.unused) - sizeof(uint64_t));
  642. memcpy(header.unused, tok_conf, tok_conf_len);
  643. if (write(fd, &header, sizeof(header)) == -1) {
  644. msg_info_pool("cannot write header to file %s, error %d, %s",
  645. filename,
  646. errno,
  647. strerror(errno));
  648. close(fd);
  649. unlink(lock);
  650. close(lock_fd);
  651. g_free(lock);
  652. return -1;
  653. }
  654. section.length = (uint64_t) nblocks;
  655. if (write(fd, &section, sizeof(section)) == -1) {
  656. msg_info_pool("cannot write section header to file %s, error %d, %s",
  657. filename,
  658. errno,
  659. strerror(errno));
  660. close(fd);
  661. unlink(lock);
  662. close(lock_fd);
  663. g_free(lock);
  664. return -1;
  665. }
  666. /* Buffer for write 256 blocks at once */
  667. if (nblocks > 256) {
  668. buflen = sizeof(block) * 256;
  669. buf = g_malloc0(buflen);
  670. }
  671. while (nblocks) {
  672. if (nblocks > 256) {
  673. /* Just write buffer */
  674. if (write(fd, buf, buflen) == -1) {
  675. msg_info_pool("cannot write blocks buffer to file %s, error %d, %s",
  676. filename,
  677. errno,
  678. strerror(errno));
  679. close(fd);
  680. g_free(buf);
  681. unlink(lock);
  682. close(lock_fd);
  683. g_free(lock);
  684. return -1;
  685. }
  686. nblocks -= 256;
  687. }
  688. else {
  689. if (write(fd, &block, sizeof(block)) == -1) {
  690. msg_info_pool("cannot write block to file %s, error %d, %s",
  691. filename,
  692. errno,
  693. strerror(errno));
  694. close(fd);
  695. if (buf) {
  696. g_free(buf);
  697. }
  698. unlink(lock);
  699. close(lock_fd);
  700. g_free(lock);
  701. return -1;
  702. }
  703. nblocks--;
  704. }
  705. }
  706. close(fd);
  707. if (buf) {
  708. g_free(buf);
  709. }
  710. unlink(lock);
  711. close(lock_fd);
  712. g_free(lock);
  713. msg_debug_pool("created statfile %s of size %l", filename, (long) size);
  714. return 0;
  715. }
  716. gpointer
  717. rspamd_mmaped_file_init(struct rspamd_stat_ctx *ctx,
  718. struct rspamd_config *cfg, struct rspamd_statfile *st)
  719. {
  720. struct rspamd_statfile_config *stf = st->stcf;
  721. rspamd_mmaped_file_t *mf;
  722. const ucl_object_t *filenameo, *sizeo;
  723. const gchar *filename;
  724. gsize size;
  725. filenameo = ucl_object_lookup(stf->opts, "filename");
  726. if (filenameo == NULL || ucl_object_type(filenameo) != UCL_STRING) {
  727. filenameo = ucl_object_lookup(stf->opts, "path");
  728. if (filenameo == NULL || ucl_object_type(filenameo) != UCL_STRING) {
  729. msg_err_config("statfile %s has no filename defined", stf->symbol);
  730. return NULL;
  731. }
  732. }
  733. filename = ucl_object_tostring(filenameo);
  734. sizeo = ucl_object_lookup(stf->opts, "size");
  735. if (sizeo == NULL || ucl_object_type(sizeo) != UCL_INT) {
  736. msg_err_config("statfile %s has no size defined", stf->symbol);
  737. return NULL;
  738. }
  739. size = ucl_object_toint(sizeo);
  740. mf = rspamd_mmaped_file_open(cfg->cfg_pool, filename, size, stf);
  741. if (mf != NULL) {
  742. mf->pool = cfg->cfg_pool;
  743. }
  744. else {
  745. /* Create file here */
  746. filenameo = ucl_object_find_key(stf->opts, "filename");
  747. if (filenameo == NULL || ucl_object_type(filenameo) != UCL_STRING) {
  748. filenameo = ucl_object_find_key(stf->opts, "path");
  749. if (filenameo == NULL || ucl_object_type(filenameo) != UCL_STRING) {
  750. msg_err_config("statfile %s has no filename defined", stf->symbol);
  751. return NULL;
  752. }
  753. }
  754. filename = ucl_object_tostring(filenameo);
  755. sizeo = ucl_object_find_key(stf->opts, "size");
  756. if (sizeo == NULL || ucl_object_type(sizeo) != UCL_INT) {
  757. msg_err_config("statfile %s has no size defined", stf->symbol);
  758. return NULL;
  759. }
  760. size = ucl_object_toint(sizeo);
  761. if (rspamd_mmaped_file_create(filename, size, stf, cfg->cfg_pool) != 0) {
  762. msg_err_config("cannot create new file");
  763. }
  764. mf = rspamd_mmaped_file_open(cfg->cfg_pool, filename, size, stf);
  765. }
  766. return (gpointer) mf;
  767. }
  768. void rspamd_mmaped_file_close(gpointer p)
  769. {
  770. rspamd_mmaped_file_t *mf = p;
  771. if (mf) {
  772. rspamd_mmaped_file_close_file(mf->pool, mf);
  773. }
  774. }
  775. gpointer
  776. rspamd_mmaped_file_runtime(struct rspamd_task *task,
  777. struct rspamd_statfile_config *stcf,
  778. gboolean learn,
  779. gpointer p,
  780. gint _id)
  781. {
  782. rspamd_mmaped_file_t *mf = p;
  783. return (gpointer) mf;
  784. }
  785. gboolean
  786. rspamd_mmaped_file_process_tokens(struct rspamd_task *task, GPtrArray *tokens,
  787. gint id,
  788. gpointer p)
  789. {
  790. rspamd_mmaped_file_t *mf = p;
  791. uint32_t h1, h2;
  792. rspamd_token_t *tok;
  793. guint i;
  794. g_assert(tokens != NULL);
  795. g_assert(p != NULL);
  796. for (i = 0; i < tokens->len; i++) {
  797. tok = g_ptr_array_index(tokens, i);
  798. memcpy(&h1, (guchar *) &tok->data, sizeof(h1));
  799. memcpy(&h2, ((guchar *) &tok->data) + sizeof(h1), sizeof(h2));
  800. tok->values[id] = rspamd_mmaped_file_get_block(mf, h1, h2);
  801. }
  802. if (mf->cf->is_spam) {
  803. task->flags |= RSPAMD_TASK_FLAG_HAS_SPAM_TOKENS;
  804. }
  805. else {
  806. task->flags |= RSPAMD_TASK_FLAG_HAS_HAM_TOKENS;
  807. }
  808. return TRUE;
  809. }
  810. gboolean
  811. rspamd_mmaped_file_learn_tokens(struct rspamd_task *task, GPtrArray *tokens,
  812. gint id,
  813. gpointer p)
  814. {
  815. rspamd_mmaped_file_t *mf = p;
  816. uint32_t h1, h2;
  817. rspamd_token_t *tok;
  818. guint i;
  819. g_assert(tokens != NULL);
  820. g_assert(p != NULL);
  821. for (i = 0; i < tokens->len; i++) {
  822. tok = g_ptr_array_index(tokens, i);
  823. memcpy(&h1, (guchar *) &tok->data, sizeof(h1));
  824. memcpy(&h2, ((guchar *) &tok->data) + sizeof(h1), sizeof(h2));
  825. rspamd_mmaped_file_set_block(task->task_pool, mf, h1, h2,
  826. tok->values[id]);
  827. }
  828. return TRUE;
  829. }
  830. gulong
  831. rspamd_mmaped_file_total_learns(struct rspamd_task *task, gpointer runtime,
  832. gpointer ctx)
  833. {
  834. rspamd_mmaped_file_t *mf = (rspamd_mmaped_file_t *) runtime;
  835. uint64_t rev = 0;
  836. time_t t;
  837. if (mf != NULL) {
  838. rspamd_mmaped_file_get_revision(mf, &rev, &t);
  839. }
  840. return rev;
  841. }
  842. gulong
  843. rspamd_mmaped_file_inc_learns(struct rspamd_task *task, gpointer runtime,
  844. gpointer ctx)
  845. {
  846. rspamd_mmaped_file_t *mf = (rspamd_mmaped_file_t *) runtime;
  847. uint64_t rev = 0;
  848. time_t t;
  849. if (mf != NULL) {
  850. rspamd_mmaped_file_inc_revision(mf);
  851. rspamd_mmaped_file_get_revision(mf, &rev, &t);
  852. }
  853. return rev;
  854. }
  855. gulong
  856. rspamd_mmaped_file_dec_learns(struct rspamd_task *task, gpointer runtime,
  857. gpointer ctx)
  858. {
  859. rspamd_mmaped_file_t *mf = (rspamd_mmaped_file_t *) runtime;
  860. uint64_t rev = 0;
  861. time_t t;
  862. if (mf != NULL) {
  863. rspamd_mmaped_file_dec_revision(mf);
  864. rspamd_mmaped_file_get_revision(mf, &rev, &t);
  865. }
  866. return rev;
  867. }
  868. ucl_object_t *
  869. rspamd_mmaped_file_get_stat(gpointer runtime,
  870. gpointer ctx)
  871. {
  872. ucl_object_t *res = NULL;
  873. uint64_t rev;
  874. rspamd_mmaped_file_t *mf = (rspamd_mmaped_file_t *) runtime;
  875. if (mf != NULL) {
  876. res = ucl_object_typed_new(UCL_OBJECT);
  877. rspamd_mmaped_file_get_revision(mf, &rev, NULL);
  878. ucl_object_insert_key(res, ucl_object_fromint(rev), "revision",
  879. 0, false);
  880. ucl_object_insert_key(res, ucl_object_fromint(mf->len), "size",
  881. 0, false);
  882. ucl_object_insert_key(res, ucl_object_fromint(rspamd_mmaped_file_get_total(mf)), "total", 0, false);
  883. ucl_object_insert_key(res, ucl_object_fromint(rspamd_mmaped_file_get_used(mf)), "used", 0, false);
  884. ucl_object_insert_key(res, ucl_object_fromstring(mf->cf->symbol),
  885. "symbol", 0, false);
  886. ucl_object_insert_key(res, ucl_object_fromstring("mmap"),
  887. "type", 0, false);
  888. ucl_object_insert_key(res, ucl_object_fromint(0),
  889. "languages", 0, false);
  890. ucl_object_insert_key(res, ucl_object_fromint(0),
  891. "users", 0, false);
  892. if (mf->cf->label) {
  893. ucl_object_insert_key(res, ucl_object_fromstring(mf->cf->label),
  894. "label", 0, false);
  895. }
  896. }
  897. return res;
  898. }
  899. gboolean
  900. rspamd_mmaped_file_finalize_learn(struct rspamd_task *task, gpointer runtime,
  901. gpointer ctx, GError **err)
  902. {
  903. rspamd_mmaped_file_t *mf = (rspamd_mmaped_file_t *) runtime;
  904. if (mf != NULL) {
  905. msync(mf->map, mf->len, MS_INVALIDATE | MS_ASYNC);
  906. }
  907. return TRUE;
  908. }
  909. gboolean
  910. rspamd_mmaped_file_finalize_process(struct rspamd_task *task, gpointer runtime,
  911. gpointer ctx)
  912. {
  913. return TRUE;
  914. }
  915. gpointer
  916. rspamd_mmaped_file_load_tokenizer_config(gpointer runtime,
  917. gsize *len)
  918. {
  919. rspamd_mmaped_file_t *mf = runtime;
  920. struct stat_file_header *header;
  921. g_assert(mf != NULL);
  922. header = mf->map;
  923. if (len) {
  924. *len = header->tokenizer_conf_len;
  925. }
  926. return header->unused;
  927. }