You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

aio_event.c 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include <event.h>
  18. #include "aio_event.h"
  19. #include "rspamd.h"
  20. #include "unix-std.h"
  21. #ifdef HAVE_SYS_EVENTFD_H
  22. #include <sys/eventfd.h>
  23. #endif
  24. #ifdef HAVE_AIO_H
  25. #include <aio.h>
  26. #endif
  27. /* Linux syscall numbers */
  28. #if defined(__i386__)
  29. # define SYS_io_setup 245
  30. # define SYS_io_destroy 246
  31. # define SYS_io_getevents 247
  32. # define SYS_io_submit 248
  33. # define SYS_io_cancel 249
  34. #elif defined(__x86_64__)
  35. # define SYS_io_setup 206
  36. # define SYS_io_destroy 207
  37. # define SYS_io_getevents 208
  38. # define SYS_io_submit 209
  39. # define SYS_io_cancel 210
  40. #else
  41. # warning \
  42. "aio is not supported on this platform, please contact author for details"
  43. # define SYS_io_setup 0
  44. # define SYS_io_destroy 0
  45. # define SYS_io_getevents 0
  46. # define SYS_io_submit 0
  47. # define SYS_io_cancel 0
  48. #endif
  49. #define SYS_eventfd 323
  50. #define MAX_AIO_EV 64
  51. struct io_cbdata {
  52. gint fd;
  53. rspamd_aio_cb cb;
  54. guint64 len;
  55. gpointer buf;
  56. gpointer io_buf;
  57. gpointer ud;
  58. };
  59. #ifdef LINUX
  60. /* Linux specific mappings and utilities to avoid using of libaio */
  61. typedef unsigned long aio_context_t;
  62. typedef enum io_iocb_cmd {
  63. IO_CMD_PREAD = 0,
  64. IO_CMD_PWRITE = 1,
  65. IO_CMD_FSYNC = 2,
  66. IO_CMD_FDSYNC = 3,
  67. IO_CMD_POLL = 5,
  68. IO_CMD_NOOP = 6,
  69. } io_iocb_cmd_t;
  70. #if defined(__LITTLE_ENDIAN)
  71. #define PADDED(x,y) x, y
  72. #elif defined(__BIG_ENDIAN)
  73. #define PADDED(x,y) y, x
  74. #else
  75. #error edit for your odd byteorder.
  76. #endif
  77. /*
  78. * we always use a 64bit off_t when communicating
  79. * with userland. its up to libraries to do the
  80. * proper padding and aio_error abstraction
  81. */
  82. struct iocb {
  83. /* these are internal to the kernel/libc. */
  84. guint64 aio_data; /* data to be returned in event's data */
  85. guint32 PADDED (aio_key, aio_reserved1);
  86. /* the kernel sets aio_key to the req # */
  87. /* common fields */
  88. guint16 aio_lio_opcode; /* see IOCB_CMD_ above */
  89. gint16 aio_reqprio;
  90. guint32 aio_fildes;
  91. guint64 aio_buf;
  92. guint64 aio_nbytes;
  93. gint64 aio_offset;
  94. /* extra parameters */
  95. guint64 aio_reserved2; /* TODO: use this for a (struct sigevent *) */
  96. /* flags for the "struct iocb" */
  97. guint32 aio_flags;
  98. /*
  99. * if the IOCB_FLAG_RESFD flag of "aio_flags" is set, this is an
  100. * eventfd to signal AIO readiness to
  101. */
  102. guint32 aio_resfd;
  103. };
  104. struct io_event {
  105. guint64 data; /* the data field from the iocb */
  106. guint64 obj; /* what iocb this event came from */
  107. gint64 res; /* result code for this event */
  108. gint64 res2; /* secondary result */
  109. };
  110. /* Linux specific io calls */
  111. static int
  112. io_setup (guint nr_reqs, aio_context_t *ctx)
  113. {
  114. return syscall (SYS_io_setup, nr_reqs, ctx);
  115. }
  116. static int
  117. io_destroy (aio_context_t ctx)
  118. {
  119. return syscall (SYS_io_destroy, ctx);
  120. }
  121. static int
  122. io_getevents (aio_context_t ctx,
  123. long min_nr,
  124. long nr,
  125. struct io_event *events,
  126. struct timespec *tmo)
  127. {
  128. return syscall (SYS_io_getevents, ctx, min_nr, nr, events, tmo);
  129. }
  130. static int
  131. io_submit (aio_context_t ctx, long n, struct iocb **paiocb)
  132. {
  133. return syscall (SYS_io_submit, ctx, n, paiocb);
  134. }
  135. static int
  136. io_cancel (aio_context_t ctx, struct iocb *iocb, struct io_event *result)
  137. {
  138. return syscall (SYS_io_cancel, ctx, iocb, result);
  139. }
  140. # ifndef HAVE_SYS_EVENTFD_H
  141. static int
  142. eventfd (guint initval, guint flags)
  143. {
  144. return syscall (SYS_eventfd, initval);
  145. }
  146. # endif
  147. #endif
  148. /**
  149. * AIO context
  150. */
  151. struct aio_context {
  152. struct event_base *base;
  153. gboolean has_aio; /**< Whether we have aio support on a system */
  154. #ifdef LINUX
  155. /* Eventfd variant */
  156. gint event_fd;
  157. struct event eventfd_ev;
  158. aio_context_t io_ctx;
  159. #elif defined(HAVE_AIO_H)
  160. /* POSIX aio */
  161. struct event rtsigs[128];
  162. #endif
  163. };
  164. #ifdef LINUX
  165. /* Eventfd read callback */
  166. static void
  167. rspamd_eventfdcb (gint fd, gshort what, gpointer ud)
  168. {
  169. struct aio_context *ctx = ud;
  170. guint64 ready;
  171. gint done, i;
  172. struct io_event event[32];
  173. struct timespec ts;
  174. struct io_cbdata *ev_data;
  175. /* Eventfd returns number of events ready got from kernel */
  176. if (read (fd, &ready, 8) != 8) {
  177. if (errno == EAGAIN) {
  178. return;
  179. }
  180. msg_err ("eventfd read returned error: %s", strerror (errno));
  181. }
  182. ts.tv_sec = 0;
  183. ts.tv_nsec = 0;
  184. while (ready) {
  185. /* Get events ready */
  186. done = io_getevents (ctx->io_ctx, 1, 32, event, &ts);
  187. if (done > 0) {
  188. ready -= done;
  189. for (i = 0; i < done; i++) {
  190. ev_data = (struct io_cbdata *) (uintptr_t) event[i].data;
  191. /* Call this callback */
  192. ev_data->cb (ev_data->fd,
  193. event[i].res,
  194. ev_data->len,
  195. ev_data->buf,
  196. ev_data->ud);
  197. if (ev_data->io_buf) {
  198. free (ev_data->io_buf);
  199. }
  200. g_slice_free1 (sizeof (struct io_cbdata), ev_data);
  201. }
  202. }
  203. else if (done == 0) {
  204. /* No more events are ready */
  205. return;
  206. }
  207. else {
  208. msg_err ("io_getevents failed: %s", strerror (errno));
  209. return;
  210. }
  211. }
  212. }
  213. #endif
  214. /**
  215. * Initialize aio with specified event base
  216. */
  217. struct aio_context *
  218. rspamd_aio_init (struct event_base *base)
  219. {
  220. struct aio_context *new;
  221. /* First of all we need to detect which type of aio we can try to use */
  222. new = g_malloc0 (sizeof (struct aio_context));
  223. new->base = base;
  224. #ifdef LINUX
  225. /* On linux we are trying to use io (3) and eventfd for notifying */
  226. new->event_fd = eventfd (0, 0);
  227. if (new->event_fd == -1) {
  228. msg_err ("eventfd failed: %s", strerror (errno));
  229. }
  230. else {
  231. /* Set this socket non-blocking */
  232. if (rspamd_socket_nonblocking (new->event_fd) == -1) {
  233. msg_err ("non blocking for eventfd failed: %s", strerror (errno));
  234. close (new->event_fd);
  235. }
  236. else {
  237. event_set (&new->eventfd_ev,
  238. new->event_fd,
  239. EV_READ | EV_PERSIST,
  240. rspamd_eventfdcb,
  241. new);
  242. event_base_set (new->base, &new->eventfd_ev);
  243. event_add (&new->eventfd_ev, NULL);
  244. if (io_setup (MAX_AIO_EV, &new->io_ctx) == -1) {
  245. msg_err ("io_setup failed: %s", strerror (errno));
  246. close (new->event_fd);
  247. }
  248. else {
  249. new->has_aio = TRUE;
  250. }
  251. }
  252. }
  253. #elif defined(HAVE_AIO_H)
  254. /* TODO: implement this */
  255. #endif
  256. return new;
  257. }
  258. /**
  259. * Open file for aio
  260. */
  261. gint
  262. rspamd_aio_open (struct aio_context *ctx, const gchar *path, int flags)
  263. {
  264. gint fd = -1;
  265. /* Fallback */
  266. if (!ctx->has_aio) {
  267. return open (path, flags);
  268. }
  269. #ifdef LINUX
  270. fd = open (path, flags | O_DIRECT);
  271. return fd;
  272. #elif defined(HAVE_AIO_H)
  273. fd = open (path, flags);
  274. #endif
  275. return fd;
  276. }
  277. /**
  278. * Asynchronous read of file
  279. */
  280. gint
  281. rspamd_aio_read (gint fd,
  282. gpointer buf,
  283. guint64 len,
  284. guint64 offset,
  285. struct aio_context *ctx,
  286. rspamd_aio_cb cb,
  287. gpointer ud)
  288. {
  289. gint r = -1;
  290. if (ctx->has_aio) {
  291. #ifdef LINUX
  292. struct iocb *iocb[1];
  293. struct io_cbdata *cbdata;
  294. cbdata = g_slice_alloc (sizeof (struct io_cbdata));
  295. cbdata->cb = cb;
  296. cbdata->buf = buf;
  297. cbdata->len = len;
  298. cbdata->ud = ud;
  299. cbdata->fd = fd;
  300. cbdata->io_buf = NULL;
  301. iocb[0] = alloca (sizeof (struct iocb));
  302. memset (iocb[0], 0, sizeof (struct iocb));
  303. iocb[0]->aio_fildes = fd;
  304. iocb[0]->aio_lio_opcode = IO_CMD_PREAD;
  305. iocb[0]->aio_reqprio = 0;
  306. iocb[0]->aio_buf = (guint64)((uintptr_t)buf);
  307. iocb[0]->aio_nbytes = len;
  308. iocb[0]->aio_offset = offset;
  309. iocb[0]->aio_flags |= (1 << 0) /* IOCB_FLAG_RESFD */;
  310. iocb[0]->aio_resfd = ctx->event_fd;
  311. iocb[0]->aio_data = (guint64)((uintptr_t)cbdata);
  312. /* Iocb is copied to kernel internally, so it is safe to put it on stack */
  313. if (io_submit (ctx->io_ctx, 1, iocb) == 1) {
  314. return len;
  315. }
  316. else {
  317. if (errno == EAGAIN || errno == ENOSYS) {
  318. /* Fall back to sync read */
  319. goto blocking;
  320. }
  321. return -1;
  322. }
  323. #elif defined(HAVE_AIO_H)
  324. #endif
  325. }
  326. else {
  327. /* Blocking variant */
  328. goto blocking;
  329. blocking:
  330. #ifdef _LARGEFILE64_SOURCE
  331. r = lseek64 (fd, offset, SEEK_SET);
  332. #else
  333. r = lseek (fd, offset, SEEK_SET);
  334. #endif
  335. if (r > 0) {
  336. r = read (fd, buf, len);
  337. if (r >= 0) {
  338. cb (fd, 0, r, buf, ud);
  339. }
  340. else {
  341. cb (fd, r, -1, buf, ud);
  342. }
  343. }
  344. }
  345. return r;
  346. }
  347. /**
  348. * Asynchronous write of file
  349. */
  350. gint
  351. rspamd_aio_write (gint fd,
  352. gpointer buf,
  353. guint64 len,
  354. guint64 offset,
  355. struct aio_context *ctx,
  356. rspamd_aio_cb cb,
  357. gpointer ud)
  358. {
  359. gint r = -1;
  360. if (ctx->has_aio) {
  361. #ifdef LINUX
  362. struct iocb *iocb[1];
  363. struct io_cbdata *cbdata;
  364. cbdata = g_slice_alloc (sizeof (struct io_cbdata));
  365. cbdata->cb = cb;
  366. cbdata->buf = buf;
  367. cbdata->len = len;
  368. cbdata->ud = ud;
  369. cbdata->fd = fd;
  370. /* We need to align pointer on boundary of 512 bytes here */
  371. if (posix_memalign (&cbdata->io_buf, 512, len) != 0) {
  372. return -1;
  373. }
  374. memcpy (cbdata->io_buf, buf, len);
  375. iocb[0] = alloca (sizeof (struct iocb));
  376. memset (iocb[0], 0, sizeof (struct iocb));
  377. iocb[0]->aio_fildes = fd;
  378. iocb[0]->aio_lio_opcode = IO_CMD_PWRITE;
  379. iocb[0]->aio_reqprio = 0;
  380. iocb[0]->aio_buf = (guint64)((uintptr_t)cbdata->io_buf);
  381. iocb[0]->aio_nbytes = len;
  382. iocb[0]->aio_offset = offset;
  383. iocb[0]->aio_flags |= (1 << 0) /* IOCB_FLAG_RESFD */;
  384. iocb[0]->aio_resfd = ctx->event_fd;
  385. iocb[0]->aio_data = (guint64)((uintptr_t)cbdata);
  386. /* Iocb is copied to kernel internally, so it is safe to put it on stack */
  387. if (io_submit (ctx->io_ctx, 1, iocb) == 1) {
  388. return len;
  389. }
  390. else {
  391. if (errno == EAGAIN || errno == ENOSYS) {
  392. /* Fall back to sync read */
  393. goto blocking;
  394. }
  395. return -1;
  396. }
  397. #elif defined(HAVE_AIO_H)
  398. #endif
  399. }
  400. else {
  401. /* Blocking variant */
  402. goto blocking;
  403. blocking:
  404. #ifdef _LARGEFILE64_SOURCE
  405. r = lseek64 (fd, offset, SEEK_SET);
  406. #else
  407. r = lseek (fd, offset, SEEK_SET);
  408. #endif
  409. if (r > 0) {
  410. r = write (fd, buf, len);
  411. if (r >= 0) {
  412. cb (fd, 0, r, buf, ud);
  413. }
  414. else {
  415. cb (fd, r, -1, buf, ud);
  416. }
  417. }
  418. }
  419. return r;
  420. }
  421. /**
  422. * Close of aio operations
  423. */
  424. gint
  425. rspamd_aio_close (gint fd, struct aio_context *ctx)
  426. {
  427. gint r = -1;
  428. if (ctx->has_aio) {
  429. #ifdef LINUX
  430. struct iocb iocb;
  431. struct io_event ev;
  432. memset (&iocb, 0, sizeof (struct iocb));
  433. iocb.aio_fildes = fd;
  434. iocb.aio_lio_opcode = IO_CMD_NOOP;
  435. /* Iocb is copied to kernel internally, so it is safe to put it on stack */
  436. r = io_cancel (ctx->io_ctx, &iocb, &ev);
  437. close (fd);
  438. return r;
  439. #elif defined(HAVE_AIO_H)
  440. #endif
  441. }
  442. r = close (fd);
  443. return r;
  444. }