Changelog: http://cvs.schmorp.de/libev/Changes?pathrev=rel-4_33

4 years ago · 4610fcee93
--- a/contrib/libev/CMakeLists.txt
+++ b/contrib/libev/CMakeLists.txt
@@ -9,8 +9,12 @@ CHECK_INCLUDE_FILES(sys/stat.h HAVE_SYS_STAT_H)
 CHECK_INCLUDE_FILES(sys/signalfd.h HAVE_SYS_SIGNALFD_H)
 CHECK_INCLUDE_FILES(port.h HAVE_PORT_H)
 CHECK_INCLUDE_FILES(poll.h HAVE_POLL_H)
 CHECK_INCLUDE_FILES(memory.h HAVE_MEMORY_H)
 CHECK_INCLUDE_FILES(sys/select.h HAVE_SYS_SELECT_H)
 CHECK_INCLUDE_FILES(sys/eventfd.h HAVE_SYS_EVENTFD_H)
 CHECK_INCLUDE_FILES(sys/timerfd.h HAVE_SYS_TIMERFD_H)
 CHECK_INCLUDE_FILES(linux/fs.h HAVE_LINUX_FS_H)
 CHECK_INCLUDE_FILES(linux/aio_abi.h HAVE_LINUX_AIO_ABI_H)

 IF(HAVE_SYS_INOTIFY_H)
 	CHECK_SYMBOL_EXISTS(inotify_init "sys/types.h;sys/inotify.h" HAVE_INOTIFY_INIT)
@@ -36,7 +40,9 @@ ENDIF()
 IF(HAVE_SYS_SIGNALFD_H)
 	CHECK_SYMBOL_EXISTS(signalfd sys/signalfd.h HAVE_EVENTFD)
 ENDIF()

 IF(HAVE_LINUX_FS_H)
 	CHECK_SYMBOL_EXISTS(RWF_SUPPORTED linux/fs.h HAVE_KERNEL_RWF_T)
 ENDIF()
 CHECK_SYMBOL_EXISTS(time.h nanosleep HAVE_NANOSLEEP)

 # check first without rt
--- a/contrib/libev/config.h.in
+++ b/contrib/libev/config.h.in
@@ -24,12 +24,21 @@
 /* Define to 1 if you have the <inttypes.h> header file. */
 #cmakedefine HAVE_INTTYPES_H 1

 /* Define to 1 if linux/fs.h defined kernel_rwf_t */
 #cmakedefine HAVE_KERNEL_RWF_T 1

 /* Define to 1 if you have the `kqueue' function. */
 #cmakedefine HAVE_KQUEUE 1

 /* Define to 1 if you have the `rt' library (-lrt). */
 #cmakedefine HAVE_LIBRT 1

 /* Define to 1 if you have the <linux/aio_abi.h> header file. */
 #cmakedefine HAVE_LINUX_AIO_ABI_H 1

 /* Define to 1 if you have the <linux/fs.h> header file. */
 #cmakedefine HAVE_LINUX_FS_H 1

 /* Define to 1 if you have the <memory.h> header file. */
 #cmakedefine HAVE_MEMORY_H 1

@@ -87,18 +96,14 @@
 /* Define to 1 if you have the <sys/stat.h> header file. */
 #cmakedefine HAVE_SYS_STAT_H 1

 /* Define to 1 if you have the <sys/timerfd.h> header file. */
 #cmakedefine HAVE_SYS_TIMERFD_H 1

 /* Define to 1 if you have the <sys/types.h> header file. */
 #cmakedefine HAVE_SYS_TYPES_H 1

 /* Define to 1 if you have the <unistd.h> header file. */
 #cmakedefine HAVE_UNISTD_H 1


 /* Define to the version of this package. */
 #define PACKAGE_VERSION 4.25

 /* Define to 1 if you have the ANSI C header files. */
 #define STDC_HEADERS 1

 /* Version number of package */
 #undef VERSION
--- a/contrib/libev/ev.c
+++ b/contrib/libev/ev.c
--- a/contrib/libev/ev.h
+++ b/contrib/libev/ev.h
@@ -1,7 +1,7 @@
 /*
 * libev native API header
 *
 * Copyright (c) 2007-2018 Marc Alexander Lehmann <libev@schmorp.de>
 * Copyright (c) 2007-2020 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
@@ -151,7 +151,10 @@ EV_CPP(extern "C" {)

 /*****************************************************************************/

 typedef double ev_tstamp;
 #ifndef EV_TSTAMP_T
 # define EV_TSTAMP_T double
 #endif
 typedef EV_TSTAMP_T ev_tstamp;

 #include <string.h> /* for memmove */

@@ -212,7 +215,7 @@ struct ev_loop;
 /*****************************************************************************/

 #define EV_VERSION_MAJOR 4
 #define EV_VERSION_MINOR 25
 #define EV_VERSION_MINOR 33

 /* eventmask, revents, events... */
 enum {
@@ -389,14 +392,12 @@ typedef struct ev_stat
 } ev_stat;
 #endif

 #if EV_IDLE_ENABLE
 /* invoked when the nothing else needs to be done, keeps the process from blocking */
 /* revent EV_IDLE */
 typedef struct ev_idle
 {
  EV_WATCHER (ev_idle)
 } ev_idle;
 #endif

 /* invoked for each run of the mainloop, just before the blocking call */
 /* you can still change events in any way you like */
@@ -413,23 +414,19 @@ typedef struct ev_check
  EV_WATCHER (ev_check)
 } ev_check;

 #if EV_FORK_ENABLE
 /* the callback gets invoked before check in the child process when a fork was detected */
 /* revent EV_FORK */
 typedef struct ev_fork
 {
  EV_WATCHER (ev_fork)
 } ev_fork;
 #endif

 #if EV_CLEANUP_ENABLE
 /* is invoked just before the loop gets destroyed */
 /* revent EV_CLEANUP */
 typedef struct ev_cleanup
 {
  EV_WATCHER (ev_cleanup)
 } ev_cleanup;
 #endif

 #if EV_EMBED_ENABLE
 /* used to embed an event loop inside another */
@@ -439,16 +436,18 @@ typedef struct ev_embed
  EV_WATCHER (ev_embed)

  struct ev_loop *other; /* ro */
 #undef EV_IO_ENABLE
 #define EV_IO_ENABLE 1
  ev_io io;              /* private */
 #undef EV_PREPARE_ENABLE
 #define EV_PREPARE_ENABLE 1
  ev_prepare prepare;    /* private */
  ev_check check;        /* unused */
  ev_timer timer;        /* unused */
  ev_periodic periodic;  /* unused */
  ev_idle idle;          /* unused */
  ev_fork fork;          /* private */
 #if EV_CLEANUP_ENABLE
  ev_cleanup cleanup;    /* unused */
 #endif
 } ev_embed;
 #endif

@@ -501,29 +500,32 @@ union ev_any_watcher
 /* flag bits for ev_default_loop and ev_loop_new */
 enum {
  /* the default */
  EVFLAG_AUTO      = 0x00000000U, /* not quite a mask */
  EVFLAG_AUTO       = 0x00000000U, /* not quite a mask */
  /* flag bits */
  EVFLAG_NOENV     = 0x01000000U, /* do NOT consult environment */
  EVFLAG_FORKCHECK = 0x02000000U, /* check for a fork in each iteration */
  EVFLAG_NOENV      = 0x01000000U, /* do NOT consult environment */
  EVFLAG_FORKCHECK  = 0x02000000U, /* check for a fork in each iteration */
  /* debugging/feature disable */
  EVFLAG_NOINOTIFY = 0x00100000U, /* do not attempt to use inotify */
  EVFLAG_NOINOTIFY  = 0x00100000U, /* do not attempt to use inotify */
 #if EV_COMPAT3
  EVFLAG_NOSIGFD   = 0, /* compatibility to pre-3.9 */
  EVFLAG_NOSIGFD    = 0, /* compatibility to pre-3.9 */
 #endif
  EVFLAG_SIGNALFD  = 0x00200000U, /* attempt to use signalfd */
  EVFLAG_NOSIGMASK = 0x00400000U  /* avoid modifying the signal mask */
  EVFLAG_SIGNALFD   = 0x00200000U, /* attempt to use signalfd */
  EVFLAG_NOSIGMASK  = 0x00400000U, /* avoid modifying the signal mask */
  EVFLAG_NOTIMERFD  = 0x00800000U  /* avoid creating a timerfd */
 };

 /* method bits to be ored together */
 enum {
  EVBACKEND_SELECT  = 0x00000001U, /* available just about anywhere */
  EVBACKEND_POLL    = 0x00000002U, /* !win, !aix, broken on osx */
  EVBACKEND_EPOLL   = 0x00000004U, /* linux */
  EVBACKEND_KQUEUE  = 0x00000008U, /* bsd, broken on osx */
  EVBACKEND_DEVPOLL = 0x00000010U, /* solaris 8 */ /* NYI */
  EVBACKEND_PORT    = 0x00000020U, /* solaris 10 */
  EVBACKEND_ALL     = 0x0000003FU, /* all known backends */
  EVBACKEND_MASK    = 0x0000FFFFU  /* all future backends */
  EVBACKEND_SELECT   = 0x00000001U, /* available just about anywhere */
  EVBACKEND_POLL     = 0x00000002U, /* !win, !aix, broken on osx */
  EVBACKEND_EPOLL    = 0x00000004U, /* linux */
  EVBACKEND_KQUEUE   = 0x00000008U, /* bsd, broken on osx */
  EVBACKEND_DEVPOLL  = 0x00000010U, /* solaris 8 */ /* NYI */
  EVBACKEND_PORT     = 0x00000020U, /* solaris 10 */
  EVBACKEND_LINUXAIO = 0x00000040U, /* linux AIO, 4.19+ */
  EVBACKEND_IOURING  = 0x00000080U, /* linux io_uring, 5.1+ */
  EVBACKEND_ALL      = 0x000000FFU, /* all known backends */
  EVBACKEND_MASK     = 0x0000FFFFU  /* all future backends */
 };

 #if EV_PROTOTYPES
@@ -557,7 +559,6 @@ EV_API_DECL void ev_set_syserr_cb (void (*cb)(const char *msg) EV_NOEXCEPT) EV_N
 /* you can call this as often as you like */
 EV_API_DECL struct ev_loop *ev_default_loop (unsigned int flags EV_CPP (= 0)) EV_NOEXCEPT;


 /* create and destroy alternative loops that don't handle signals */
 EV_API_DECL struct ev_loop *ev_loop_new (unsigned int flags EV_CPP (= 0)) EV_NOEXCEPT;

@@ -643,6 +644,8 @@ EV_API_DECL int ev_active_cnt (EV_P) EV_NOEXCEPT;
 */
 EV_API_DECL void ev_once (EV_P_ int fd, int events, ev_tstamp timeout, void (*cb)(int revents, void *arg), void *arg) EV_NOEXCEPT;

 EV_API_DECL void ev_invoke_pending (EV_P); /* invoke all pending watchers */

 # if EV_FEATURE_API
 EV_API_DECL unsigned int ev_iteration (EV_P) EV_NOEXCEPT; /* number of loop iterations */
 EV_API_DECL unsigned int ev_depth     (EV_P) EV_NOEXCEPT; /* #ev_loop enters - #ev_loop leaves */
@@ -660,7 +663,6 @@ EV_API_DECL void ev_set_invoke_pending_cb (EV_P_ ev_loop_callback invoke_pending
 EV_API_DECL void ev_set_loop_release_cb (EV_P_ void (*release)(EV_P) EV_NOEXCEPT, void (*acquire)(EV_P) EV_NOEXCEPT) EV_NOEXCEPT;

 EV_API_DECL unsigned int ev_pending_count (EV_P) EV_NOEXCEPT; /* number of pending events, if any */
 EV_API_DECL void ev_invoke_pending (EV_P); /* invoke all pending watchers */

 /*
 * stop/start the timer handling.
@@ -680,6 +682,7 @@ EV_API_DECL void ev_resume  (EV_P) EV_NOEXCEPT;
  ev_set_cb ((ev), cb_);			\
 } while (0)

 #define ev_io_modify(ev,events_)             do { (ev)->events = (ev)->events & EV__IOFDSET | (events_); } while (0)
 #define ev_io_set(ev,fd_,events_)            do { (ev)->fd = (fd_); (ev)->events = (events_) | EV__IOFDSET; } while (0)
 #define ev_timer_set(ev,after_,repeat_)      do { ((ev_watcher_time *)(ev))->at = (after_); (ev)->repeat = (repeat_); } while (0)
 #define ev_periodic_set(ev,ofs_,ival_,rcb_)  do { (ev)->offset = (ofs_); (ev)->interval = (ival_); (ev)->reschedule_cb = (rcb_); } while (0)
@@ -712,7 +715,6 @@ EV_API_DECL void ev_resume  (EV_P) EV_NOEXCEPT;
 #define ev_is_active(ev)                     (0 + ((ev_watcher *)(void *)(ev))->active) /* ro, true when the watcher has been started */
 #define ev_can_stop(ev)                      (ev_is_pending(ev) || ev_is_active(ev)) /* ro, true when the watcher has been started */


 #define ev_cb_(ev)                           (ev)->cb /* rw */
 #define ev_cb(ev)                            (memmove (&ev_cb_ (ev), &((ev_watcher *)(ev))->cb, sizeof (ev_cb_ (ev))), (ev)->cb)

@@ -727,6 +729,7 @@ EV_API_DECL void ev_resume  (EV_P) EV_NOEXCEPT;
 #define ev_periodic_at(ev)                   (+((ev_watcher_time *)(ev))->at)

 #ifndef ev_set_cb
 /* memmove is used here to avoid strict aliasing violations, and hopefully is optimized out by any reasonable compiler */
 # define ev_set_cb(ev,cb_)                   (ev_cb_ (ev) = (cb_), memmove (&((ev_watcher *)(ev))->cb, &ev_cb_ (ev), sizeof (ev_cb_ (ev))))
 #endif

--- a/contrib/libev/ev_epoll.c
+++ b/contrib/libev/ev_epoll.c
@@ -1,7 +1,7 @@
 /*
 * libev epoll fd activity backend
 *
 * Copyright (c) 2007,2008,2009,2010,2011 Marc Alexander Lehmann <libev@schmorp.de>
 * Copyright (c) 2007,2008,2009,2010,2011,2016,2017,2019 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
@@ -93,10 +93,10 @@ epoll_modify (EV_P_ int fd, int oev, int nev)
  ev.events   = (nev & EV_READ  ? EPOLLIN  : 0)
              | (nev & EV_WRITE ? EPOLLOUT : 0);

  if (expect_true (!epoll_ctl (backend_fd, oev && oldmask != nev ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, fd, &ev)))
  if (ecb_expect_true (!epoll_ctl (backend_fd, oev && oldmask != nev ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, fd, &ev)))
    return;

  if (expect_true (errno == ENOENT))
  if (ecb_expect_true (errno == ENOENT))
    {
      /* if ENOENT then the fd went away, so try to do the right thing */
      if (!nev)
@@ -105,7 +105,7 @@ epoll_modify (EV_P_ int fd, int oev, int nev)
      if (!epoll_ctl (backend_fd, EPOLL_CTL_ADD, fd, &ev))
        return;
    }
  else if (expect_true (errno == EEXIST))
  else if (ecb_expect_true (errno == EEXIST))
    {
      /* EEXIST means we ignored a previous DEL, but the fd is still active */
      /* if the kernel mask is the same as the new mask, we assume it hasn't changed */
@@ -115,7 +115,7 @@ epoll_modify (EV_P_ int fd, int oev, int nev)
      if (!epoll_ctl (backend_fd, EPOLL_CTL_MOD, fd, &ev))
        return;
    }
  else if (expect_true (errno == EPERM))
  else if (ecb_expect_true (errno == EPERM))
    {
      /* EPERM means the fd is always ready, but epoll is too snobbish */
      /* to handle it, unlike select or poll. */
@@ -124,12 +124,14 @@ epoll_modify (EV_P_ int fd, int oev, int nev)
      /* add fd to epoll_eperms, if not already inside */
      if (!(oldmask & EV_EMASK_EPERM))
        {
          array_needsize (int, epoll_eperms, epoll_epermmax, epoll_epermcnt + 1, EMPTY2);
          array_needsize (int, epoll_eperms, epoll_epermmax, epoll_epermcnt + 1, array_needsize_noinit);
          epoll_eperms [epoll_epermcnt++] = fd;
        }

      return;
    }
  else
    assert (("libev: I/O watcher with invalid fd found in epoll_ctl", errno != EBADF && errno != ELOOP && errno != EINVAL));

  fd_kill (EV_A_ fd);

@@ -144,16 +146,16 @@ epoll_poll (EV_P_ ev_tstamp timeout)
  int i;
  int eventcnt;

  if (expect_false (epoll_epermcnt))
    timeout = 0.;
  if (ecb_expect_false (epoll_epermcnt))
    timeout = EV_TS_CONST (0.);

  /* epoll wait times cannot be larger than (LONG_MAX - 999UL) / HZ msecs, which is below */
  /* the default libev max wait time, however. */
  EV_RELEASE_CB;
  eventcnt = epoll_wait (backend_fd, epoll_events, epoll_eventmax, timeout * 1e3);
  eventcnt = epoll_wait (backend_fd, epoll_events, epoll_eventmax, EV_TS_TO_MSEC (timeout));
  EV_ACQUIRE_CB;

  if (expect_false (eventcnt < 0))
  if (ecb_expect_false (eventcnt < 0))
    {
      if (errno != EINTR)
        ev_syserr ("(libev) epoll_wait");
@@ -176,14 +178,14 @@ epoll_poll (EV_P_ ev_tstamp timeout)
       * other spurious notifications will be found by epoll_ctl, below
       * we assume that fd is always in range, as we never shrink the anfds array
       */
      if (expect_false ((uint32_t)anfds [fd].egen != (uint32_t)(ev->data.u64 >> 32)))
      if (ecb_expect_false ((uint32_t)anfds [fd].egen != (uint32_t)(ev->data.u64 >> 32)))
        {
          /* recreate kernel state */
          postfork |= 2;
          continue;
        }

      if (expect_false (got & ~want))
      if (ecb_expect_false (got & ~want))
        {
          anfds [fd].emask = want;

@@ -195,6 +197,8 @@ epoll_poll (EV_P_ ev_tstamp timeout)
           * above with the gencounter check (== our fd is not the event fd), and
           * partially here, when epoll_ctl returns an error (== a child has the fd
           * but we closed it).
           * note: for events such as POLLHUP, where we can't know whether it refers
           * to EV_READ or EV_WRITE, we might issue redundant EPOLL_CTL_MOD calls.
           */
          ev->events = (want & EV_READ  ? EPOLLIN  : 0)
                     | (want & EV_WRITE ? EPOLLOUT : 0);
@@ -212,7 +216,7 @@ epoll_poll (EV_P_ ev_tstamp timeout)
    }

  /* if the receive array was full, increase its size */
  if (expect_false (eventcnt == epoll_eventmax))
  if (ecb_expect_false (eventcnt == epoll_eventmax))
    {
      ev_free (epoll_events);
      epoll_eventmax = array_nextsize (sizeof (struct epoll_event), epoll_eventmax, epoll_eventmax + 1);
@@ -235,23 +239,34 @@ epoll_poll (EV_P_ ev_tstamp timeout)
    }
 }

 inline_size
 int
 epoll_init (EV_P_ int flags)
 static int
 epoll_epoll_create (void)
 {
  int fd;

 #if defined EPOLL_CLOEXEC && !defined __ANDROID__
  backend_fd = epoll_create1 (EPOLL_CLOEXEC);
  fd = epoll_create1 (EPOLL_CLOEXEC);

  if (backend_fd < 0 && (errno == EINVAL || errno == ENOSYS))
  if (fd < 0 && (errno == EINVAL || errno == ENOSYS))
 #endif
    backend_fd = epoll_create (256);
    {
      fd = epoll_create (256);

  if (backend_fd < 0)
    return 0;
      if (fd >= 0)
        fcntl (fd, F_SETFD, FD_CLOEXEC);
    }

  return fd;
 }

  fcntl (backend_fd, F_SETFD, FD_CLOEXEC);
 inline_size
 int
 epoll_init (EV_P_ int flags)
 {
  if ((backend_fd = epoll_epoll_create ()) < 0)
    return 0;

  backend_mintime = 1e-3; /* epoll does sometimes return early, this is just to avoid the worst */
  backend_mintime = EV_TS_CONST (1e-3); /* epoll does sometimes return early, this is just to avoid the worst */
  backend_modify  = epoll_modify;
  backend_poll    = epoll_poll;

@@ -269,17 +284,15 @@ epoll_destroy (EV_P)
  array_free (epoll_eperm, EMPTY);
 }

 inline_size
 void
 ecb_cold
 static void
 epoll_fork (EV_P)
 {
  close (backend_fd);

  while ((backend_fd = epoll_create (256)) < 0)
  while ((backend_fd = epoll_epoll_create ()) < 0)
    ev_syserr ("(libev) epoll_create");

  fcntl (backend_fd, F_SETFD, FD_CLOEXEC);

  fd_rearm_all (EV_A);
 }

--- a/contrib/libev/ev_iouring.c
+++ b/contrib/libev/ev_iouring.c
@@ -0,0 +1,694 @@
 /*
 * libev linux io_uring fd activity backend
 *
 * Copyright (c) 2019-2020 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
 * tion, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
 * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * the GNU General Public License ("GPL") version 2 or any later version,
 * in which case the provisions of the GPL are applicable instead of
 * the above. If you wish to allow the use of your version of this file
 * only under the terms of the GPL and not to allow others to use your
 * version of this file under the BSD license, indicate your decision
 * by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL. If you do not delete the
 * provisions above, a recipient may use your version of this file under
 * either the BSD or the GPL.
 */

 /*
 * general notes about linux io_uring:
 *
 * a) it's the best interface I have seen so far. on linux.
 * b) best is not necessarily very good.
 * c) it's better than the aio mess, doesn't suffer from the fork problems
 *    of linux aio or epoll and so on and so on. and you could do event stuff
 *    without any syscalls. what's not to like?
 * d) ok, it's vastly more complex, but that's ok, really.
 * e) why two mmaps instead of one? one would be more space-efficient,
 *    and I can't see what benefit two would have (other than being
 *    somehow resizable/relocatable, but that's apparently not possible).
 * f) hmm, it's practically undebuggable (gdb can't access the memory, and
 *    the bizarre way structure offsets are communicated makes it hard to
 *    just print the ring buffer heads, even *iff* the memory were visible
 *    in gdb. but then, that's also ok, really.
 * g) well, you cannot specify a timeout when waiting for events. no,
 *    seriously, the interface doesn't support a timeout. never seen _that_
 *    before. sure, you can use a timerfd, but that's another syscall
 *    you could have avoided. overall, this bizarre omission smells
 *    like a µ-optimisation by the io_uring author for his personal
 *    applications, to the detriment of everybody else who just wants
 *    an event loop. but, umm, ok, if that's all, it could be worse.
 *    (from what I gather from the author Jens Axboe, it simply didn't
 *    occur to him, and he made good on it by adding an unlimited nuber
 *    of timeouts later :).
 * h) initially there was a hardcoded limit of 4096 outstanding events.
 *    later versions not only bump this to 32k, but also can handle
 *    an unlimited amount of events, so this only affects the batch size.
 * i) unlike linux aio, you *can* register more then the limit
 *    of fd events. while early verisons of io_uring signalled an overflow
 *    and you ended up getting wet. 5.5+ does not do this anymore.
 * j) but, oh my! it had exactly the same bugs as the linux aio backend,
 *    where some undocumented poll combinations just fail. fortunately,
 *    after finally reaching the author, he was more than willing to fix
 *    this probably in 5.6+.
 * k) overall, the *API* itself is, I dare to say, not a total trainwreck.
 *    once the bugs ae fixed (probably in 5.6+), it will be without
 *    competition.
 */

 /* TODO: use internal TIMEOUT */
 /* TODO: take advantage of single mmap, NODROP etc. */
 /* TODO: resize cq/sq size independently */

 #include <sys/timerfd.h>
 #include <sys/mman.h>
 #include <poll.h>
 #include <stdint.h>

 #define IOURING_INIT_ENTRIES 32

 /*****************************************************************************/
 /* syscall wrapdadoop - this section has the raw api/abi definitions */

 #include <linux/fs.h>
 #include <linux/types.h>

 /* mostly directly taken from the kernel or documentation */

 struct io_uring_sqe
 {
  __u8 opcode;
  __u8 flags;
  __u16 ioprio;
  __s32 fd;
  union {
    __u64 off;
    __u64 addr2;
  };
  __u64 addr;
  __u32 len;
  union {
    __kernel_rwf_t rw_flags;
    __u32 fsync_flags;
    __u16 poll_events;
    __u32 sync_range_flags;
    __u32 msg_flags;
    __u32 timeout_flags;
    __u32 accept_flags;
    __u32 cancel_flags;
    __u32 open_flags;
    __u32 statx_flags;
  };
  __u64 user_data;
  union {
    __u16 buf_index;
    __u64 __pad2[3];
  };
 };

 struct io_uring_cqe
 {
  __u64 user_data;
  __s32 res;
  __u32 flags;
 };

 struct io_sqring_offsets
 {
  __u32 head;
  __u32 tail;
  __u32 ring_mask;
  __u32 ring_entries;
  __u32 flags;
  __u32 dropped;
  __u32 array;
  __u32 resv1;
  __u64 resv2;
 };

 struct io_cqring_offsets
 {
  __u32 head;
  __u32 tail;
  __u32 ring_mask;
  __u32 ring_entries;
  __u32 overflow;
  __u32 cqes;
  __u64 resv[2];
 };

 struct io_uring_params
 {
  __u32 sq_entries;
  __u32 cq_entries;
  __u32 flags;
  __u32 sq_thread_cpu;
  __u32 sq_thread_idle;
  __u32 features;
  __u32 resv[4];
  struct io_sqring_offsets sq_off;
  struct io_cqring_offsets cq_off;
 };

 #define IORING_SETUP_CQSIZE 0x00000008

 #define IORING_OP_POLL_ADD        6
 #define IORING_OP_POLL_REMOVE     7
 #define IORING_OP_TIMEOUT        11
 #define IORING_OP_TIMEOUT_REMOVE 12

 /* relative or absolute, reference clock is CLOCK_MONOTONIC */
 struct iouring_kernel_timespec
 {
  int64_t tv_sec;
  long long tv_nsec;
 };

 #define IORING_TIMEOUT_ABS 0x00000001

 #define IORING_ENTER_GETEVENTS 0x01

 #define IORING_OFF_SQ_RING 0x00000000ULL
 #define IORING_OFF_CQ_RING 0x08000000ULL
 #define IORING_OFF_SQES	   0x10000000ULL

 #define IORING_FEAT_SINGLE_MMAP   0x00000001
 #define IORING_FEAT_NODROP        0x00000002
 #define IORING_FEAT_SUBMIT_STABLE 0x00000004

 inline_size
 int
 evsys_io_uring_setup (unsigned entries, struct io_uring_params *params)
 {
  return ev_syscall2 (SYS_io_uring_setup, entries, params);
 }

 inline_size
 int
 evsys_io_uring_enter (int fd, unsigned to_submit, unsigned min_complete, unsigned flags, const sigset_t *sig, size_t sigsz)
 {
  return ev_syscall6 (SYS_io_uring_enter, fd, to_submit, min_complete, flags, sig, sigsz);
 }

 /*****************************************************************************/
 /* actual backed implementation */

 /* we hope that volatile will make the compiler access this variables only once */
 #define EV_SQ_VAR(name) *(volatile unsigned *)((char *)iouring_sq_ring + iouring_sq_ ## name)
 #define EV_CQ_VAR(name) *(volatile unsigned *)((char *)iouring_cq_ring + iouring_cq_ ## name)

 /* the index array */
 #define EV_SQ_ARRAY     ((unsigned *)((char *)iouring_sq_ring + iouring_sq_array))

 /* the submit/completion queue entries */
 #define EV_SQES         ((struct io_uring_sqe *)         iouring_sqes)
 #define EV_CQES         ((struct io_uring_cqe *)((char *)iouring_cq_ring + iouring_cq_cqes))

 inline_speed
 int
 iouring_enter (EV_P_ ev_tstamp timeout)
 {
  int res;

  EV_RELEASE_CB;

  res = evsys_io_uring_enter (iouring_fd, iouring_to_submit, 1,
                              timeout > EV_TS_CONST (0.) ? IORING_ENTER_GETEVENTS : 0, 0, 0);

  assert (("libev: io_uring_enter did not consume all sqes", (res < 0 || res == iouring_to_submit)));

  iouring_to_submit = 0;

  EV_ACQUIRE_CB;

  return res;
 }

 /* TODO: can we move things around so we don't need this forward-reference? */
 static void
 iouring_poll (EV_P_ ev_tstamp timeout);

 static
 struct io_uring_sqe *
 iouring_sqe_get (EV_P)
 {
  unsigned tail;
  
  for (;;)
    {
      tail = EV_SQ_VAR (tail);

      if (ecb_expect_true (tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries)))
        break; /* whats the problem, we have free sqes */

      /* queue full, need to flush and possibly handle some events */

 #if EV_FEATURE_CODE
      /* first we ask the kernel nicely, most often this frees up some sqes */
      int res = iouring_enter (EV_A_ EV_TS_CONST (0.));

      ECB_MEMORY_FENCE_ACQUIRE; /* better safe than sorry */

      if (res >= 0)
        continue; /* yes, it worked, try again */
 #endif

      /* some problem, possibly EBUSY - do the full poll and let it handle any issues */

      iouring_poll (EV_A_ EV_TS_CONST (0.));
      /* iouring_poll should have done ECB_MEMORY_FENCE_ACQUIRE for us */
    }

  /*assert (("libev: io_uring queue full after flush", tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries)));*/

  return EV_SQES + (tail & EV_SQ_VAR (ring_mask));
 }

 inline_size
 struct io_uring_sqe *
 iouring_sqe_submit (EV_P_ struct io_uring_sqe *sqe)
 {
  unsigned idx = sqe - EV_SQES;

  EV_SQ_ARRAY [idx] = idx;
  ECB_MEMORY_FENCE_RELEASE;
  ++EV_SQ_VAR (tail);
  /*ECB_MEMORY_FENCE_RELEASE; /* for the time being we assume this is not needed */
  ++iouring_to_submit;
 }

 /*****************************************************************************/

 /* when the timerfd expires we simply note the fact,
 * as the purpose of the timerfd is to wake us up, nothing else.
 * the next iteration should re-set it.
 */
 static void
 iouring_tfd_cb (EV_P_ struct ev_io *w, int revents)
 {
  iouring_tfd_to = EV_TSTAMP_HUGE;
 }

 /* called for full and partial cleanup */
 ecb_cold
 static int
 iouring_internal_destroy (EV_P)
 {
  close (iouring_tfd);
  close (iouring_fd);

  if (iouring_sq_ring != MAP_FAILED) munmap (iouring_sq_ring, iouring_sq_ring_size);
  if (iouring_cq_ring != MAP_FAILED) munmap (iouring_cq_ring, iouring_cq_ring_size);
  if (iouring_sqes    != MAP_FAILED) munmap (iouring_sqes   , iouring_sqes_size   );

  if (ev_is_active (&iouring_tfd_w))
    {
      ev_ref (EV_A);
      ev_io_stop (EV_A_ &iouring_tfd_w);
    }
 }

 ecb_cold
 static int
 iouring_internal_init (EV_P)
 {
  struct io_uring_params params = { 0 };

  iouring_to_submit = 0;

  iouring_tfd     = -1;
  iouring_sq_ring = MAP_FAILED;
  iouring_cq_ring = MAP_FAILED;
  iouring_sqes    = MAP_FAILED;

  if (!have_monotonic) /* cannot really happen, but what if11 */
    return -1;

  for (;;)
    {
      iouring_fd = evsys_io_uring_setup (iouring_entries, &params);

      if (iouring_fd >= 0)
        break; /* yippie */

      if (errno != EINVAL)
        return -1; /* we failed */

 #if TODO
      if ((~params.features) & (IORING_FEAT_NODROP | IORING_FEATURE_SINGLE_MMAP | IORING_FEAT_SUBMIT_STABLE))
        return -1; /* we require the above features */
 #endif

      /* EINVAL: lots of possible reasons, but maybe
       * it is because we hit the unqueryable hardcoded size limit
       */

      /* we hit the limit already, give up */
      if (iouring_max_entries)
        return -1;

      /* first time we hit EINVAL? assume we hit the limit, so go back and retry */
      iouring_entries >>= 1;
      iouring_max_entries = iouring_entries;
    }

  iouring_sq_ring_size = params.sq_off.array + params.sq_entries * sizeof (unsigned);
  iouring_cq_ring_size = params.cq_off.cqes  + params.cq_entries * sizeof (struct io_uring_cqe);
  iouring_sqes_size    =                       params.sq_entries * sizeof (struct io_uring_sqe);

  iouring_sq_ring = mmap (0, iouring_sq_ring_size, PROT_READ | PROT_WRITE,
                          MAP_SHARED | MAP_POPULATE, iouring_fd, IORING_OFF_SQ_RING);
  iouring_cq_ring = mmap (0, iouring_cq_ring_size, PROT_READ | PROT_WRITE,
                          MAP_SHARED | MAP_POPULATE, iouring_fd, IORING_OFF_CQ_RING);
  iouring_sqes    = mmap (0, iouring_sqes_size, PROT_READ | PROT_WRITE,
                          MAP_SHARED | MAP_POPULATE, iouring_fd, IORING_OFF_SQES);

  if (iouring_sq_ring == MAP_FAILED || iouring_cq_ring == MAP_FAILED || iouring_sqes == MAP_FAILED)
    return -1;

  iouring_sq_head         = params.sq_off.head;
  iouring_sq_tail         = params.sq_off.tail;
  iouring_sq_ring_mask    = params.sq_off.ring_mask;
  iouring_sq_ring_entries = params.sq_off.ring_entries;
  iouring_sq_flags        = params.sq_off.flags;
  iouring_sq_dropped      = params.sq_off.dropped;
  iouring_sq_array        = params.sq_off.array;

  iouring_cq_head         = params.cq_off.head;
  iouring_cq_tail         = params.cq_off.tail;
  iouring_cq_ring_mask    = params.cq_off.ring_mask;
  iouring_cq_ring_entries = params.cq_off.ring_entries;
  iouring_cq_overflow     = params.cq_off.overflow;
  iouring_cq_cqes         = params.cq_off.cqes;

  iouring_tfd = timerfd_create (CLOCK_MONOTONIC, TFD_CLOEXEC);

  if (iouring_tfd < 0)
    return iouring_tfd;

  iouring_tfd_to = EV_TSTAMP_HUGE;

  return 0;
 }

 ecb_cold
 static void
 iouring_fork (EV_P)
 {
  iouring_internal_destroy (EV_A);

  while (iouring_internal_init (EV_A) < 0)
    ev_syserr ("(libev) io_uring_setup");

  fd_rearm_all (EV_A);

  ev_io_stop  (EV_A_ &iouring_tfd_w);
  ev_io_set   (EV_A_ &iouring_tfd_w, iouring_tfd, EV_READ);
  ev_io_start (EV_A_ &iouring_tfd_w);
 }

 /*****************************************************************************/

 static void
 iouring_modify (EV_P_ int fd, int oev, int nev)
 {
  if (oev)
    {
      /* we assume the sqe's are all "properly" initialised */
      struct io_uring_sqe *sqe = iouring_sqe_get (EV_A);
      sqe->opcode    = IORING_OP_POLL_REMOVE;
      sqe->fd        = fd;
      /* Jens Axboe notified me that user_data is not what is documented, but is
       * some kind of unique ID that has to match, otherwise the request cannot
       * be removed. Since we don't *really* have that, we pass in the old
       * generation counter - if that fails, too bad, it will hopefully be removed
       * at close time and then be ignored. */
      sqe->addr      = (uint32_t)fd | ((__u64)(uint32_t)anfds [fd].egen << 32);
      sqe->user_data = (uint64_t)-1;
      iouring_sqe_submit (EV_A_ sqe);

      /* increment generation counter to avoid handling old events */
      ++anfds [fd].egen;
    }

  if (nev)
    {
      struct io_uring_sqe *sqe = iouring_sqe_get (EV_A);
      sqe->opcode      = IORING_OP_POLL_ADD;
      sqe->fd          = fd;
      sqe->addr        = 0;
      sqe->user_data   = (uint32_t)fd | ((__u64)(uint32_t)anfds [fd].egen << 32);
      sqe->poll_events =
        (nev & EV_READ ? POLLIN : 0)
        | (nev & EV_WRITE ? POLLOUT : 0);
      iouring_sqe_submit (EV_A_ sqe);
    }
 }

 inline_size
 void
 iouring_tfd_update (EV_P_ ev_tstamp timeout)
 {
  ev_tstamp tfd_to = mn_now + timeout;

  /* we assume there will be many iterations per timer change, so
   * we only re-set the timerfd when we have to because its expiry
   * is too late.
   */
  if (ecb_expect_false (tfd_to < iouring_tfd_to))
    {
       struct itimerspec its;

       iouring_tfd_to = tfd_to;
       EV_TS_SET (its.it_interval, 0.);
       EV_TS_SET (its.it_value, tfd_to);

       if (timerfd_settime (iouring_tfd, TFD_TIMER_ABSTIME, &its, 0) < 0)
         assert (("libev: iouring timerfd_settime failed", 0));
    }
 }

 inline_size
 void
 iouring_process_cqe (EV_P_ struct io_uring_cqe *cqe)
 {
  int      fd  = cqe->user_data & 0xffffffffU;
  uint32_t gen = cqe->user_data >> 32;
  int      res = cqe->res;

  /* user_data -1 is a remove that we are not atm. interested in */
  if (cqe->user_data == (uint64_t)-1)
    return;

  assert (("libev: io_uring fd must be in-bounds", fd >= 0 && fd < anfdmax));

  /* documentation lies, of course. the result value is NOT like
   * normal syscalls, but like linux raw syscalls, i.e. negative
   * error numbers. fortunate, as otherwise there would be no way
   * to get error codes at all. still, why not document this?
   */

  /* ignore event if generation doesn't match */
  /* other than skipping removal events, */
  /* this should actually be very rare */
  if (ecb_expect_false (gen != (uint32_t)anfds [fd].egen))
    return;

  if (ecb_expect_false (res < 0))
    {
      /*TODO: EINVAL handling (was something failed with this fd)*/

      if (res == -EBADF)
        {
          assert (("libev: event loop rejected bad fd", res != -EBADF));
          fd_kill (EV_A_ fd);
        }
      else
        {
          errno = -res;
          ev_syserr ("(libev) IORING_OP_POLL_ADD");
        }

      return;
    }

  /* feed events, we do not expect or handle POLLNVAL */
  fd_event (
    EV_A_
    fd,
    (res & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0)
    | (res & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0)
  );

  /* io_uring is oneshot, so we need to re-arm the fd next iteration */
  /* this also means we usually have to do at least one syscall per iteration */
  anfds [fd].events = 0;
  fd_change (EV_A_ fd, EV_ANFD_REIFY);
 }

 /* called when the event queue overflows */
 ecb_cold
 static void
 iouring_overflow (EV_P)
 {
  /* we have two options, resize the queue (by tearing down
   * everything and recreating it, or living with it
   * and polling.
   * we implement this by resizing the queue, and, if that fails,
   * we just recreate the state on every failure, which
   * kind of is a very inefficient poll.
   * one danger is, due to the bios toward lower fds,
   * we will only really get events for those, so
   * maybe we need a poll() fallback, after all.
   */
  /*EV_CQ_VAR (overflow) = 0;*/ /* need to do this if we keep the state and poll manually */

  fd_rearm_all (EV_A);

  /* we double the size until we hit the hard-to-probe maximum */
  if (!iouring_max_entries)
    {
      iouring_entries <<= 1;
      iouring_fork (EV_A);
    }
  else
    {
      /* we hit the kernel limit, we should fall back to something else.
       * we can either poll() a few times and hope for the best,
       * poll always, or switch to epoll.
       * TODO: is this necessary with newer kernels?
       */

      iouring_internal_destroy (EV_A);

      /* this should make it so that on return, we don't call any uring functions */
      iouring_to_submit = 0;

      for (;;)
        {
          backend = epoll_init (EV_A_ 0);

          if (backend)
            break;

          ev_syserr ("(libev) iouring switch to epoll");
        }
    }
 }

 /* handle any events in the completion queue, return true if there were any */
 static int
 iouring_handle_cq (EV_P)
 {
  unsigned head, tail, mask;
  
  head = EV_CQ_VAR (head);
  ECB_MEMORY_FENCE_ACQUIRE;
  tail = EV_CQ_VAR (tail);

  if (head == tail)
    return 0;

  /* it can only overflow if we have events, yes, yes? */
  if (ecb_expect_false (EV_CQ_VAR (overflow)))
    {
      iouring_overflow (EV_A);
      return 1;
    }

  mask = EV_CQ_VAR (ring_mask);

  do
    iouring_process_cqe (EV_A_ &EV_CQES [head++ & mask]);
  while (head != tail);

  EV_CQ_VAR (head) = head;
  ECB_MEMORY_FENCE_RELEASE;

  return 1;
 }

 static void
 iouring_poll (EV_P_ ev_tstamp timeout)
 {
  /* if we have events, no need for extra syscalls, but we might have to queue events */
  /* we also clar the timeout if there are outstanding fdchanges */
  /* the latter should only happen if both the sq and cq are full, most likely */
  /* because we have a lot of event sources that immediately complete */
  /* TODO: fdchacngecnt is always 0 because fd_reify does not have two buffers yet */
  if (iouring_handle_cq (EV_A) || fdchangecnt)
    timeout = EV_TS_CONST (0.);
  else
    /* no events, so maybe wait for some */
    iouring_tfd_update (EV_A_ timeout);

  /* only enter the kernel if we have something to submit, or we need to wait */
  if (timeout || iouring_to_submit)
    {
      int res = iouring_enter (EV_A_ timeout);

      if (ecb_expect_false (res < 0))
        if (errno == EINTR)
          /* ignore */;
        else if (errno == EBUSY)
          /* cq full, cannot submit - should be rare because we flush the cq first, so simply ignore */;
        else
          ev_syserr ("(libev) iouring setup");
      else
        iouring_handle_cq (EV_A);
    }
 }

 inline_size
 int
 iouring_init (EV_P_ int flags)
 {
  iouring_entries     = IOURING_INIT_ENTRIES;
  iouring_max_entries = 0;

  if (iouring_internal_init (EV_A) < 0)
    {
      iouring_internal_destroy (EV_A);
      return 0;
    }

  ev_io_init  (&iouring_tfd_w, iouring_tfd_cb, iouring_tfd, EV_READ);
  ev_set_priority (&iouring_tfd_w, EV_MINPRI);
  ev_io_start (EV_A_ &iouring_tfd_w);
  ev_unref (EV_A); /* watcher should not keep loop alive */

  backend_modify = iouring_modify;
  backend_poll   = iouring_poll;

  return EVBACKEND_IOURING;
 }

 inline_size
 void
 iouring_destroy (EV_P)
 {
  iouring_internal_destroy (EV_A);
 }

--- a/contrib/libev/ev_kqueue.c
+++ b/contrib/libev/ev_kqueue.c
@@ -1,7 +1,7 @@
 /*
 * libev kqueue backend
 *
 * Copyright (c) 2007,2008,2009,2010,2011,2012,2013 Marc Alexander Lehmann <libev@schmorp.de>
 * Copyright (c) 2007,2008,2009,2010,2011,2012,2013,2016,2019 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
@@ -48,7 +48,7 @@ void
 kqueue_change (EV_P_ int fd, int filter, int flags, int fflags)
 {
  ++kqueue_changecnt;
  array_needsize (struct kevent, kqueue_changes, kqueue_changemax, kqueue_changecnt, EMPTY2);
  array_needsize (struct kevent, kqueue_changes, kqueue_changemax, kqueue_changecnt, array_needsize_noinit);

  EV_SET (&kqueue_changes [kqueue_changecnt - 1], fd, filter, flags, fflags, 0, 0);
 }
@@ -103,10 +103,10 @@ kqueue_poll (EV_P_ ev_tstamp timeout)
  EV_ACQUIRE_CB;
  kqueue_changecnt = 0;

  if (expect_false (res < 0))
  if (ecb_expect_false (res < 0))
    {
      if (errno != EINTR)
        ev_syserr ("(libev) kevent");
        ev_syserr ("(libev) kqueue kevent");

      return;
    }
@@ -115,7 +115,7 @@ kqueue_poll (EV_P_ ev_tstamp timeout)
    {
      int fd = kqueue_events [i].ident;

      if (expect_false (kqueue_events [i].flags & EV_ERROR))
      if (ecb_expect_false (kqueue_events [i].flags & EV_ERROR))
        {
          int err = kqueue_events [i].data;

@@ -129,10 +129,16 @@ kqueue_poll (EV_P_ ev_tstamp timeout)
                  if (fd_valid (fd))
                    kqueue_modify (EV_A_ fd, 0, anfds [fd].events);
                  else
                    fd_kill (EV_A_ fd);
                    {
                      assert (("libev: kqueue found invalid fd", 0));
                      fd_kill (EV_A_ fd);
                    }
                }
              else /* on all other errors, we error out on the fd */
                fd_kill (EV_A_ fd);
                {
                  assert (("libev: kqueue found invalid fd", 0));
                  fd_kill (EV_A_ fd);
                }
            }
        }
      else
@@ -145,7 +151,7 @@ kqueue_poll (EV_P_ ev_tstamp timeout)
        );
    }

  if (expect_false (res == kqueue_eventmax))
  if (ecb_expect_false (res == kqueue_eventmax))
    {
      ev_free (kqueue_events);
      kqueue_eventmax = array_nextsize (sizeof (struct kevent), kqueue_eventmax, kqueue_eventmax + 1);
@@ -164,7 +170,7 @@ kqueue_init (EV_P_ int flags)

  fcntl (backend_fd, F_SETFD, FD_CLOEXEC); /* not sure if necessary, hopefully doesn't hurt */

  backend_mintime = 1e-9; /* apparently, they did the right thing in freebsd */
  backend_mintime = EV_TS_CONST (1e-9); /* apparently, they did the right thing in freebsd */
  backend_modify  = kqueue_modify;
  backend_poll    = kqueue_poll;

--- a/contrib/libev/ev_linuxaio.c
+++ b/contrib/libev/ev_linuxaio.c
@@ -0,0 +1,620 @@
 /*
 * libev linux aio fd activity backend
 *
 * Copyright (c) 2019 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
 * tion, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
 * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * the GNU General Public License ("GPL") version 2 or any later version,
 * in which case the provisions of the GPL are applicable instead of
 * the above. If you wish to allow the use of your version of this file
 * only under the terms of the GPL and not to allow others to use your
 * version of this file under the BSD license, indicate your decision
 * by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL. If you do not delete the
 * provisions above, a recipient may use your version of this file under
 * either the BSD or the GPL.
 */

 /*
 * general notes about linux aio:
 *
 * a) at first, the linux aio IOCB_CMD_POLL functionality introduced in
 *    4.18 looks too good to be true: both watchers and events can be
 *    batched, and events can even be handled in userspace using
 *    a ring buffer shared with the kernel. watchers can be canceled
 *    regardless of whether the fd has been closed. no problems with fork.
 *    ok, the ring buffer is 200% undocumented (there isn't even a
 *    header file), but otherwise, it's pure bliss!
 * b) ok, watchers are one-shot, so you have to re-arm active ones
 *    on every iteration. so much for syscall-less event handling,
 *    but at least these re-arms can be batched, no big deal, right?
 * c) well, linux as usual: the documentation lies to you: io_submit
 *    sometimes returns EINVAL because the kernel doesn't feel like
 *    handling your poll mask - ttys can be polled for POLLOUT,
 *    POLLOUT|POLLIN, but polling for POLLIN fails. just great,
 *    so we have to fall back to something else (hello, epoll),
 *    but at least the fallback can be slow, because these are
 *    exceptional cases, right?
 * d) hmm, you have to tell the kernel the maximum number of watchers
 *    you want to queue when initialising the aio context. but of
 *    course the real limit is magically calculated in the kernel, and
 *    is often higher then we asked for. so we just have to destroy
 *    the aio context and re-create it a bit larger if we hit the limit.
 *    (starts to remind you of epoll? well, it's a bit more deterministic
 *    and less gambling, but still ugly as hell).
 * e) that's when you find out you can also hit an arbitrary system-wide
 *    limit. or the kernel simply doesn't want to handle your watchers.
 *    what the fuck do we do then? you guessed it, in the middle
 *    of event handling we have to switch to 100% epoll polling. and
 *    that better is as fast as normal epoll polling, so you practically
 *    have to use the normal epoll backend with all its quirks.
 * f) end result of this train wreck: it inherits all the disadvantages
 *    from epoll, while adding a number on its own. why even bother to use
 *    it? because if conditions are right and your fds are supported and you
 *    don't hit a limit, this backend is actually faster, doesn't gamble with
 *    your fds, batches watchers and events and doesn't require costly state
 *    recreates. well, until it does.
 * g) all of this makes this backend use almost twice as much code as epoll.
 *    which in turn uses twice as much code as poll. and that#s not counting
 *    the fact that this backend also depends on the epoll backend, making
 *    it three times as much code as poll, or kqueue.
 * h) bleah. why can't linux just do kqueue. sure kqueue is ugly, but by now
 *    it's clear that whatever linux comes up with is far, far, far worse.
 */

 #include <sys/time.h> /* actually linux/time.h, but we must assume they are compatible */
 #include <poll.h>
 #include <linux/aio_abi.h>

 /*****************************************************************************/
 /* syscall wrapdadoop - this section has the raw api/abi definitions */

 #include <sys/syscall.h> /* no glibc wrappers */

 /* aio_abi.h is not versioned in any way, so we cannot test for its existance */
 #define IOCB_CMD_POLL 5

 /* taken from linux/fs/aio.c. yup, that's a .c file.
 * not only is this totally undocumented, not even the source code
 * can tell you what the future semantics of compat_features and
 * incompat_features are, or what header_length actually is for.
 */
 #define AIO_RING_MAGIC                  0xa10a10a1
 #define EV_AIO_RING_INCOMPAT_FEATURES   0
 struct aio_ring
 {
  unsigned id;    /* kernel internal index number */
  unsigned nr;    /* number of io_events */
  unsigned head;  /* Written to by userland or by kernel. */
  unsigned tail;

  unsigned magic;
  unsigned compat_features;
  unsigned incompat_features;
  unsigned header_length;  /* size of aio_ring */

  struct io_event io_events[0];
 };

 inline_size
 int
 evsys_io_setup (unsigned nr_events, aio_context_t *ctx_idp)
 {
  return ev_syscall2 (SYS_io_setup, nr_events, ctx_idp);
 }

 inline_size
 int
 evsys_io_destroy (aio_context_t ctx_id)
 {
  return ev_syscall1 (SYS_io_destroy, ctx_id);
 }

 inline_size
 int
 evsys_io_submit (aio_context_t ctx_id, long nr, struct iocb *cbp[])
 {
  return ev_syscall3 (SYS_io_submit, ctx_id, nr, cbp);
 }

 inline_size
 int
 evsys_io_cancel (aio_context_t ctx_id, struct iocb *cbp, struct io_event *result)
 {
  return ev_syscall3 (SYS_io_cancel, ctx_id, cbp, result);
 }

 inline_size
 int
 evsys_io_getevents (aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout)
 {
  return ev_syscall5 (SYS_io_getevents, ctx_id, min_nr, nr, events, timeout);
 }

 /*****************************************************************************/
 /* actual backed implementation */

 ecb_cold
 static int
 linuxaio_nr_events (EV_P)
 {
  /* we start with 16 iocbs and incraese from there
   * that's tiny, but the kernel has a rather low system-wide
   * limit that can be reached quickly, so let's be parsimonious
   * with this resource.
   * Rest assured, the kernel generously rounds up small and big numbers
   * in different ways (but doesn't seem to charge you for it).
   * The 15 here is because the kernel usually has a power of two as aio-max-nr,
   * and this helps to take advantage of that limit.
   */

  /* we try to fill 4kB pages exactly.
   * the ring buffer header is 32 bytes, every io event is 32 bytes.
   * the kernel takes the io requests number, doubles it, adds 2
   * and adds the ring buffer.
   * the way we use this is by starting low, and then roughly doubling the
   * size each time we hit a limit.
   */

  int requests   = 15 << linuxaio_iteration;
  int one_page   =  (4096
                    / sizeof (struct io_event)    ) / 2; /* how many fit into one page */
  int first_page = ((4096 - sizeof (struct aio_ring))
                    / sizeof (struct io_event) - 2) / 2; /* how many fit into the first page */

  /* if everything fits into one page, use count exactly */
  if (requests > first_page)
    /* otherwise, round down to full pages and add the first page */
    requests = requests / one_page * one_page + first_page;

  return requests;
 }

 /* we use out own wrapper structure in case we ever want to do something "clever" */
 typedef struct aniocb
 {
  struct iocb io;
  /*int inuse;*/
 } *ANIOCBP;

 inline_size
 void
 linuxaio_array_needsize_iocbp (ANIOCBP *base, int offset, int count)
 {
  while (count--)
    {
      /* TODO: quite the overhead to allocate every iocb separately, maybe use our own allocator? */
      ANIOCBP iocb = (ANIOCBP)ev_malloc (sizeof (*iocb));

      /* full zero initialise is probably not required at the moment, but
       * this is not well documented, so we better do it.
       */
      memset (iocb, 0, sizeof (*iocb));

      iocb->io.aio_lio_opcode = IOCB_CMD_POLL;
      iocb->io.aio_fildes     = offset;

      base [offset++] = iocb;
    }
 }

 ecb_cold
 static void
 linuxaio_free_iocbp (EV_P)
 {
  while (linuxaio_iocbpmax--)
    ev_free (linuxaio_iocbps [linuxaio_iocbpmax]);

  linuxaio_iocbpmax = 0; /* next resize will completely reallocate the array, at some overhead */
 }

 static void
 linuxaio_modify (EV_P_ int fd, int oev, int nev)
 {
  array_needsize (ANIOCBP, linuxaio_iocbps, linuxaio_iocbpmax, fd + 1, linuxaio_array_needsize_iocbp);
  ANIOCBP iocb = linuxaio_iocbps [fd];
  ANFD *anfd = &anfds [fd];

  if (ecb_expect_false (iocb->io.aio_reqprio < 0))
    {
      /* we handed this fd over to epoll, so undo this first */
      /* we do it manually because the optimisations on epoll_modify won't do us any good */
      epoll_ctl (backend_fd, EPOLL_CTL_DEL, fd, 0);
      anfd->emask = 0;
      iocb->io.aio_reqprio = 0;
    }
  else if (ecb_expect_false (iocb->io.aio_buf))
    {
      /* iocb active, so cancel it first before resubmit */
      /* this assumes we only ever get one call per fd per loop iteration */
      for (;;)
        {
          /* on all relevant kernels, io_cancel fails with EINPROGRESS on "success" */
          if (ecb_expect_false (evsys_io_cancel (linuxaio_ctx, &iocb->io, (struct io_event *)0) == 0))
            break;

          if (ecb_expect_true (errno == EINPROGRESS))
            break;

          /* the EINPROGRESS test is for nicer error message. clumsy. */
          if (errno != EINTR)
            {
              assert (("libev: linuxaio unexpected io_cancel failed", errno != EINTR && errno != EINPROGRESS));
              break;
            }
       }

      /* increment generation counter to avoid handling old events */
      ++anfd->egen;
    }

  iocb->io.aio_buf = (nev & EV_READ  ? POLLIN  : 0)
                   | (nev & EV_WRITE ? POLLOUT : 0);

  if (nev)
    {
      iocb->io.aio_data = (uint32_t)fd | ((__u64)(uint32_t)anfd->egen << 32);

      /* queue iocb up for io_submit */
      /* this assumes we only ever get one call per fd per loop iteration */
      ++linuxaio_submitcnt;
      array_needsize (struct iocb *, linuxaio_submits, linuxaio_submitmax, linuxaio_submitcnt, array_needsize_noinit);
      linuxaio_submits [linuxaio_submitcnt - 1] = &iocb->io;
    }
 }

 static void
 linuxaio_epoll_cb (EV_P_ struct ev_io *w, int revents)
 {
  epoll_poll (EV_A_ 0);
 }

 inline_speed
 void
 linuxaio_fd_rearm (EV_P_ int fd)
 {
  anfds [fd].events = 0;
  linuxaio_iocbps [fd]->io.aio_buf = 0;
  fd_change (EV_A_ fd, EV_ANFD_REIFY);
 }

 static void
 linuxaio_parse_events (EV_P_ struct io_event *ev, int nr)
 {
  while (nr)
    {
      int fd       = ev->data & 0xffffffff;
      uint32_t gen = ev->data >> 32;
      int res      = ev->res;

      assert (("libev: iocb fd must be in-bounds", fd >= 0 && fd < anfdmax));

      /* only accept events if generation counter matches */
      if (ecb_expect_true (gen == (uint32_t)anfds [fd].egen))
        {
          /* feed events, we do not expect or handle POLLNVAL */
          fd_event (
            EV_A_
            fd,
            (res & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0)
            | (res & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0)
          );

          /* linux aio is oneshot: rearm fd. TODO: this does more work than strictly needed */
          linuxaio_fd_rearm (EV_A_ fd);
        }

      --nr;
      ++ev;
    }
 }

 /* get any events from ring buffer, return true if any were handled */
 static int
 linuxaio_get_events_from_ring (EV_P)
 {
  struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx;
  unsigned head, tail;

  /* the kernel reads and writes both of these variables, */
  /* as a C extension, we assume that volatile use here */
  /* both makes reads atomic and once-only */
  head = *(volatile unsigned *)&ring->head;
  ECB_MEMORY_FENCE_ACQUIRE;
  tail = *(volatile unsigned *)&ring->tail;

  if (head == tail)
    return 0;

  /* parse all available events, but only once, to avoid starvation */
  if (ecb_expect_true (tail > head)) /* normal case around */
    linuxaio_parse_events (EV_A_ ring->io_events + head, tail - head);
  else /* wrapped around */
    {
      linuxaio_parse_events (EV_A_ ring->io_events + head, ring->nr - head);
      linuxaio_parse_events (EV_A_ ring->io_events, tail);
    }

  ECB_MEMORY_FENCE_RELEASE;
  /* as an extension to C, we hope that the volatile will make this atomic and once-only */
  *(volatile unsigned *)&ring->head = tail;

  return 1;
 }

 inline_size
 int
 linuxaio_ringbuf_valid (EV_P)
 {
  struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx;

  return ecb_expect_true (ring->magic == AIO_RING_MAGIC)
                      && ring->incompat_features == EV_AIO_RING_INCOMPAT_FEATURES
                      && ring->header_length == sizeof (struct aio_ring); /* TODO: or use it to find io_event[0]? */
 }

 /* read at least one event from kernel, or timeout */
 inline_size
 void
 linuxaio_get_events (EV_P_ ev_tstamp timeout)
 {
  struct timespec ts;
  struct io_event ioev[8]; /* 256 octet stack space */
  int want = 1; /* how many events to request */
  int ringbuf_valid = linuxaio_ringbuf_valid (EV_A);

  if (ecb_expect_true (ringbuf_valid))
    {
      /* if the ring buffer has any events, we don't wait or call the kernel at all */
      if (linuxaio_get_events_from_ring (EV_A))
        return;

      /* if the ring buffer is empty, and we don't have a timeout, then don't call the kernel */
      if (!timeout)
        return;
    }
  else
    /* no ringbuffer, request slightly larger batch */
    want = sizeof (ioev) / sizeof (ioev [0]);

  /* no events, so wait for some
   * for fairness reasons, we do this in a loop, to fetch all events
   */
  for (;;)
    {
      int res;

      EV_RELEASE_CB;

      EV_TS_SET (ts, timeout);
      res = evsys_io_getevents (linuxaio_ctx, 1, want, ioev, &ts);

      EV_ACQUIRE_CB;

      if (res < 0)
        if (errno == EINTR)
          /* ignored, retry */;
        else
          ev_syserr ("(libev) linuxaio io_getevents");
      else if (res)
        {
          /* at least one event available, handle them */
          linuxaio_parse_events (EV_A_ ioev, res);

          if (ecb_expect_true (ringbuf_valid))
            {
              /* if we have a ring buffer, handle any remaining events in it */
              linuxaio_get_events_from_ring (EV_A);

              /* at this point, we should have handled all outstanding events */
              break;
            }
          else if (res < want)
            /* otherwise, if there were fewere events than we wanted, we assume there are no more */
            break;
        }
      else
        break; /* no events from the kernel, we are done */

      timeout = EV_TS_CONST (0.); /* only wait in the first iteration */
    }
 }

 inline_size
 int
 linuxaio_io_setup (EV_P)
 {
  linuxaio_ctx = 0;
  return evsys_io_setup (linuxaio_nr_events (EV_A), &linuxaio_ctx);
 }

 static void
 linuxaio_poll (EV_P_ ev_tstamp timeout)
 {
  int submitted;

  /* first phase: submit new iocbs */

  /* io_submit might return less than the requested number of iocbs */
  /* this is, afaics, only because of errors, but we go by the book and use a loop, */
  /* which allows us to pinpoint the erroneous iocb */
  for (submitted = 0; submitted < linuxaio_submitcnt; )
    {
      int res = evsys_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted);

      if (ecb_expect_false (res < 0))
        if (errno == EINVAL)
          {
            /* This happens for unsupported fds, officially, but in my testing,
             * also randomly happens for supported fds. We fall back to good old
             * poll() here, under the assumption that this is a very rare case.
             * See https://lore.kernel.org/patchwork/patch/1047453/ to see
             * discussion about such a case (ttys) where polling for POLLIN
             * fails but POLLIN|POLLOUT works.
             */
            struct iocb *iocb = linuxaio_submits [submitted];
            epoll_modify (EV_A_ iocb->aio_fildes, 0, anfds [iocb->aio_fildes].events);
            iocb->aio_reqprio = -1; /* mark iocb as epoll */

            res = 1; /* skip this iocb - another iocb, another chance */
          }
        else if (errno == EAGAIN)
          {
            /* This happens when the ring buffer is full, or some other shit we
             * don't know and isn't documented. Most likely because we have too
             * many requests and linux aio can't be assed to handle them.
             * In this case, we try to allocate a larger ring buffer, freeing
             * ours first. This might fail, in which case we have to fall back to 100%
             * epoll.
             * God, how I hate linux not getting its act together. Ever.
             */
            evsys_io_destroy (linuxaio_ctx);
            linuxaio_submitcnt = 0;

            /* rearm all fds with active iocbs */
            {
              int fd;
 	      for (fd = 0; fd < linuxaio_iocbpmax; ++fd)
                if (linuxaio_iocbps [fd]->io.aio_buf)
                  linuxaio_fd_rearm (EV_A_ fd);
            }

            ++linuxaio_iteration;
            if (linuxaio_io_setup (EV_A) < 0)
              {
                /* TODO: rearm all and recreate epoll backend from scratch */
                /* TODO: might be more prudent? */

                /* to bad, we can't get a new aio context, go 100% epoll */
                linuxaio_free_iocbp (EV_A);
                ev_io_stop (EV_A_ &linuxaio_epoll_w);
                ev_ref (EV_A);
                linuxaio_ctx = 0;

                backend        = EVBACKEND_EPOLL;
                backend_modify = epoll_modify;
                backend_poll   = epoll_poll;
              }

            timeout = EV_TS_CONST (0.);
            /* it's easiest to handle this mess in another iteration */
            return;
          }
        else if (errno == EBADF)
          {
            assert (("libev: event loop rejected bad fd", errno != EBADF));
            fd_kill (EV_A_ linuxaio_submits [submitted]->aio_fildes);

            res = 1; /* skip this iocb */
          }
        else if (errno == EINTR) /* not seen in reality, not documented */
          res = 0; /* silently ignore and retry */
        else
          {
            ev_syserr ("(libev) linuxaio io_submit");
            res = 0;
          }

      submitted += res;
    }

  linuxaio_submitcnt = 0;

  /* second phase: fetch and parse events */

  linuxaio_get_events (EV_A_ timeout);
 }

 inline_size
 int
 linuxaio_init (EV_P_ int flags)
 {
  /* would be great to have a nice test for IOCB_CMD_POLL instead */
  /* also: test some semi-common fd types, such as files and ttys in recommended_backends */
  /* 4.18 introduced IOCB_CMD_POLL, 4.19 made epoll work, and we need that */
  if (ev_linux_version () < 0x041300)
    return 0;

  if (!epoll_init (EV_A_ 0))
    return 0;

  linuxaio_iteration = 0;

  if (linuxaio_io_setup (EV_A) < 0)
    {
      epoll_destroy (EV_A);
      return 0;
    }

  ev_io_init  (&linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ);
  ev_set_priority (&linuxaio_epoll_w, EV_MAXPRI);
  ev_io_start (EV_A_ &linuxaio_epoll_w);
  ev_unref (EV_A); /* watcher should not keep loop alive */

  backend_modify = linuxaio_modify;
  backend_poll   = linuxaio_poll;

  linuxaio_iocbpmax = 0;
  linuxaio_iocbps = 0;

  linuxaio_submits = 0;
  linuxaio_submitmax = 0;
  linuxaio_submitcnt = 0;

  return EVBACKEND_LINUXAIO;
 }

 inline_size
 void
 linuxaio_destroy (EV_P)
 {
  epoll_destroy (EV_A);
  linuxaio_free_iocbp (EV_A);
  evsys_io_destroy (linuxaio_ctx); /* fails in child, aio context is destroyed */
 }

 ecb_cold
 static void
 linuxaio_fork (EV_P)
 {
  linuxaio_submitcnt = 0; /* all pointers were invalidated */
  linuxaio_free_iocbp (EV_A); /* this frees all iocbs, which is very heavy-handed */
  evsys_io_destroy (linuxaio_ctx); /* fails in child, aio context is destroyed */

  linuxaio_iteration = 0; /* we start over in the child */

  while (linuxaio_io_setup (EV_A) < 0)
    ev_syserr ("(libev) linuxaio io_setup");

  /* forking epoll should also effectively unregister all fds from the backend */
  epoll_fork (EV_A);
  /* epoll_fork already did this. hopefully */
  /*fd_rearm_all (EV_A);*/

  ev_io_stop  (EV_A_ &linuxaio_epoll_w);
  ev_io_set   (EV_A_ &linuxaio_epoll_w, backend_fd, EV_READ);
  ev_io_start (EV_A_ &linuxaio_epoll_w);
 }

--- a/contrib/libev/ev_poll.c
+++ b/contrib/libev/ev_poll.c
@@ -1,7 +1,7 @@
 /*
 * libev poll fd activity backend
 *
 * Copyright (c) 2007,2008,2009,2010,2011 Marc Alexander Lehmann <libev@schmorp.de>
 * Copyright (c) 2007,2008,2009,2010,2011,2016,2019 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
@@ -41,10 +41,12 @@

 inline_size
 void
 pollidx_init (int *base, int count)
 array_needsize_pollidx (int *base, int offset, int count)
 {
  /* consider using memset (.., -1, ...), which is practically guaranteed
   * to work on all systems implementing poll */
  /* using memset (.., -1, ...) is tempting, we we try
   * to be ultraportable
   */
  base += offset;
  while (count--)
    *base++ = -1;
 }
@@ -57,14 +59,14 @@ poll_modify (EV_P_ int fd, int oev, int nev)
  if (oev == nev)
    return;

  array_needsize (int, pollidxs, pollidxmax, fd + 1, pollidx_init);
  array_needsize (int, pollidxs, pollidxmax, fd + 1, array_needsize_pollidx);

  idx = pollidxs [fd];

  if (idx < 0) /* need to allocate a new pollfd */
    {
      pollidxs [fd] = idx = pollcnt++;
      array_needsize (struct pollfd, polls, pollmax, pollcnt, EMPTY2);
      array_needsize (struct pollfd, polls, pollmax, pollcnt, array_needsize_noinit);
      polls [idx].fd = fd;
    }

@@ -78,7 +80,7 @@ poll_modify (EV_P_ int fd, int oev, int nev)
    {
      pollidxs [fd] = -1;

      if (expect_true (idx < --pollcnt))
      if (ecb_expect_true (idx < --pollcnt))
        {
          polls [idx] = polls [pollcnt];
          pollidxs [polls [idx].fd] = idx;
@@ -93,10 +95,10 @@ poll_poll (EV_P_ ev_tstamp timeout)
  int res;
  
  EV_RELEASE_CB;
  res = poll (polls, pollcnt, timeout * 1e3);
  res = poll (polls, pollcnt, EV_TS_TO_MSEC (timeout));
  EV_ACQUIRE_CB;

  if (expect_false (res < 0))
  if (ecb_expect_false (res < 0))
    {
      if (errno == EBADF)
        fd_ebadf (EV_A);
@@ -108,14 +110,17 @@ poll_poll (EV_P_ ev_tstamp timeout)
  else
    for (p = polls; res; ++p)
      {
        assert (("libev: poll() returned illegal result, broken BSD kernel?", p < polls + pollcnt));
        assert (("libev: poll returned illegal result, broken BSD kernel?", p < polls + pollcnt));

        if (expect_false (p->revents)) /* this expect is debatable */
        if (ecb_expect_false (p->revents)) /* this expect is debatable */
          {
            --res;

            if (expect_false (p->revents & POLLNVAL))
              fd_kill (EV_A_ p->fd);
            if (ecb_expect_false (p->revents & POLLNVAL))
              {
                assert (("libev: poll found invalid fd in poll set", 0));
                fd_kill (EV_A_ p->fd);
              }
            else
              fd_event (
                EV_A_
@@ -131,7 +136,7 @@ inline_size
 int
 poll_init (EV_P_ int flags)
 {
  backend_mintime = 1e-3;
  backend_mintime = EV_TS_CONST (1e-3);
  backend_modify  = poll_modify;
  backend_poll    = poll_poll;

--- a/contrib/libev/ev_port.c
+++ b/contrib/libev/ev_port.c
@@ -1,7 +1,7 @@
 /*
 * libev solaris event port backend
 *
 * Copyright (c) 2007,2008,2009,2010,2011 Marc Alexander Lehmann <libev@schmorp.de>
 * Copyright (c) 2007,2008,2009,2010,2011,2019 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
@@ -69,7 +69,10 @@ port_associate_and_check (EV_P_ int fd, int ev)
  )
    {
      if (errno == EBADFD)
        fd_kill (EV_A_ fd);
        {
          assert (("libev: port_associate found invalid fd", errno != EBADFD));
          fd_kill (EV_A_ fd);
        }
      else
        ev_syserr ("(libev) port_associate");
    }
@@ -129,7 +132,7 @@ port_poll (EV_P_ ev_tstamp timeout)
        }
    }

  if (expect_false (nget == port_eventmax))
  if (ecb_expect_false (nget == port_eventmax))
    {
      ev_free (port_events);
      port_eventmax = array_nextsize (sizeof (port_event_t), port_eventmax, port_eventmax + 1);
@@ -151,11 +154,11 @@ port_init (EV_P_ int flags)

  /* if my reading of the opensolaris kernel sources are correct, then
   * opensolaris does something very stupid: it checks if the time has already
   * elapsed and doesn't round up if that is the case,m otherwise it DOES round
   * elapsed and doesn't round up if that is the case, otherwise it DOES round
   * up. Since we can't know what the case is, we need to guess by using a
   * "large enough" timeout. Normally, 1e-9 would be correct.
   */
  backend_mintime = 1e-3; /* needed to compensate for port_getn returning early */
  backend_mintime = EV_TS_CONST (1e-3); /* needed to compensate for port_getn returning early */
  backend_modify  = port_modify;
  backend_poll    = port_poll;

--- a/contrib/libev/ev_select.c
+++ b/contrib/libev/ev_select.c
@@ -108,7 +108,7 @@ select_modify (EV_P_ int fd, int oev, int nev)
    int     word = fd / NFDBITS;
    fd_mask mask = 1UL << (fd % NFDBITS);

    if (expect_false (vec_max <= word))
    if (ecb_expect_false (vec_max <= word))
      {
        int new_max = word + 1;

@@ -171,7 +171,7 @@ select_poll (EV_P_ ev_tstamp timeout)
 #endif
  EV_ACQUIRE_CB;

  if (expect_false (res < 0))
  if (ecb_expect_false (res < 0))
    {
      #if EV_SELECT_IS_WINSOCKET
      errno = WSAGetLastError ();
@@ -197,7 +197,7 @@ select_poll (EV_P_ ev_tstamp timeout)
        {
          if (timeout)
            {
              unsigned long ms = timeout * 1e3;
              unsigned long ms = EV_TS_TO_MSEC (timeout);
              Sleep (ms ? ms : 1);
            }

@@ -236,7 +236,7 @@ select_poll (EV_P_ ev_tstamp timeout)
          if (FD_ISSET (handle, (fd_set *)vec_eo)) events |= EV_WRITE;
          #endif

          if (expect_true (events))
          if (ecb_expect_true (events))
            fd_event (EV_A_ fd, events);
        }
  }
@@ -262,7 +262,7 @@ select_poll (EV_P_ ev_tstamp timeout)
              events |= word_r & mask ? EV_READ  : 0;
              events |= word_w & mask ? EV_WRITE : 0;

              if (expect_true (events))
              if (ecb_expect_true (events))
                fd_event (EV_A_ word * NFDBITS + bit, events);
            }
      }
@@ -275,7 +275,7 @@ inline_size
 int
 select_init (EV_P_ int flags)
 {
  backend_mintime = 1e-6;
  backend_mintime = EV_TS_CONST (1e-6);
  backend_modify  = select_modify;
  backend_poll    = select_poll;

--- a/contrib/libev/ev_vars.h
+++ b/contrib/libev/ev_vars.h
@@ -1,7 +1,7 @@
 /*
 * loop member variable declarations
 *
 * Copyright (c) 2007,2008,2009,2010,2011,2012,2013 Marc Alexander Lehmann <libev@schmorp.de>
 * Copyright (c) 2007,2008,2009,2010,2011,2012,2013,2019 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
@@ -107,6 +107,46 @@ VARx(int, epoll_epermcnt)
 VARx(int, epoll_epermmax)
 #endif

 #if EV_USE_LINUXAIO || EV_GENWRAP
 VARx(aio_context_t, linuxaio_ctx)
 VARx(int, linuxaio_iteration)
 VARx(struct aniocb **, linuxaio_iocbps)
 VARx(int, linuxaio_iocbpmax)
 VARx(struct iocb **, linuxaio_submits)
 VARx(int, linuxaio_submitcnt)
 VARx(int, linuxaio_submitmax)
 VARx(ev_io, linuxaio_epoll_w)
 #endif

 #if EV_USE_IOURING || EV_GENWRAP
 VARx(int, iouring_fd)
 VARx(unsigned, iouring_to_submit);
 VARx(int, iouring_entries)
 VARx(int, iouring_max_entries)
 VARx(void *, iouring_sq_ring)
 VARx(void *, iouring_cq_ring)
 VARx(void *, iouring_sqes)
 VARx(uint32_t, iouring_sq_ring_size)
 VARx(uint32_t, iouring_cq_ring_size)
 VARx(uint32_t, iouring_sqes_size)
 VARx(uint32_t, iouring_sq_head)
 VARx(uint32_t, iouring_sq_tail)
 VARx(uint32_t, iouring_sq_ring_mask)
 VARx(uint32_t, iouring_sq_ring_entries)
 VARx(uint32_t, iouring_sq_flags)
 VARx(uint32_t, iouring_sq_dropped)
 VARx(uint32_t, iouring_sq_array)
 VARx(uint32_t, iouring_cq_head)
 VARx(uint32_t, iouring_cq_tail)
 VARx(uint32_t, iouring_cq_ring_mask)
 VARx(uint32_t, iouring_cq_ring_entries)
 VARx(uint32_t, iouring_cq_overflow)
 VARx(uint32_t, iouring_cq_cqes)
 VARx(ev_tstamp, iouring_tfd_to)
 VARx(int, iouring_tfd)
 VARx(ev_io, iouring_tfd_w)
 #endif

 #if EV_USE_KQUEUE || EV_GENWRAP
 VARx(pid_t, kqueue_fd_pid)
 VARx(struct kevent *, kqueue_changes)
@@ -187,6 +227,11 @@ VARx(ev_io, sigfd_w)
 VARx(sigset_t, sigfd_set)
 #endif

 #if EV_USE_TIMERFD || EV_GENWRAP
 VARx(int, timerfd) /* timerfd for time jump detection */
 VARx(ev_io, timerfd_w)
 #endif

 VARx(unsigned int, origflags) /* original loop flags */

 #if EV_FEATURE_API || EV_GENWRAP
--- a/contrib/libev/ev_win32.c
+++ b/contrib/libev/ev_win32.c
@@ -154,8 +154,8 @@ ev_time (void)
  ui.u.LowPart  = ft.dwLowDateTime;
  ui.u.HighPart = ft.dwHighDateTime;

  /* msvc cannot convert ulonglong to double... yes, it is that sucky */
  return (LONGLONG)(ui.QuadPart - 116444736000000000) * 1e-7;
  /* also, msvc cannot convert ulonglong to double... yes, it is that sucky */
  return EV_TS_FROM_USEC (((LONGLONG)(ui.QuadPart - 116444736000000000) * 1e-1));
 }

 #endif
--- a/contrib/libev/ev_wrap.h
+++ b/contrib/libev/ev_wrap.h
@@ -44,12 +44,46 @@
 #define invoke_cb ((loop)->invoke_cb)
 #define io_blocktime ((loop)->io_blocktime)
 #define iocp ((loop)->iocp)
 #define iouring_cq_cqes ((loop)->iouring_cq_cqes)
 #define iouring_cq_head ((loop)->iouring_cq_head)
 #define iouring_cq_overflow ((loop)->iouring_cq_overflow)
 #define iouring_cq_ring ((loop)->iouring_cq_ring)
 #define iouring_cq_ring_entries ((loop)->iouring_cq_ring_entries)
 #define iouring_cq_ring_mask ((loop)->iouring_cq_ring_mask)
 #define iouring_cq_ring_size ((loop)->iouring_cq_ring_size)
 #define iouring_cq_tail ((loop)->iouring_cq_tail)
 #define iouring_entries ((loop)->iouring_entries)
 #define iouring_fd ((loop)->iouring_fd)
 #define iouring_max_entries ((loop)->iouring_max_entries)
 #define iouring_sq_array ((loop)->iouring_sq_array)
 #define iouring_sq_dropped ((loop)->iouring_sq_dropped)
 #define iouring_sq_flags ((loop)->iouring_sq_flags)
 #define iouring_sq_head ((loop)->iouring_sq_head)
 #define iouring_sq_ring ((loop)->iouring_sq_ring)
 #define iouring_sq_ring_entries ((loop)->iouring_sq_ring_entries)
 #define iouring_sq_ring_mask ((loop)->iouring_sq_ring_mask)
 #define iouring_sq_ring_size ((loop)->iouring_sq_ring_size)
 #define iouring_sq_tail ((loop)->iouring_sq_tail)
 #define iouring_sqes ((loop)->iouring_sqes)
 #define iouring_sqes_size ((loop)->iouring_sqes_size)
 #define iouring_tfd ((loop)->iouring_tfd)
 #define iouring_tfd_to ((loop)->iouring_tfd_to)
 #define iouring_tfd_w ((loop)->iouring_tfd_w)
 #define iouring_to_submit ((loop)->iouring_to_submit)
 #define kqueue_changecnt ((loop)->kqueue_changecnt)
 #define kqueue_changemax ((loop)->kqueue_changemax)
 #define kqueue_changes ((loop)->kqueue_changes)
 #define kqueue_eventmax ((loop)->kqueue_eventmax)
 #define kqueue_events ((loop)->kqueue_events)
 #define kqueue_fd_pid ((loop)->kqueue_fd_pid)
 #define linuxaio_ctx ((loop)->linuxaio_ctx)
 #define linuxaio_epoll_w ((loop)->linuxaio_epoll_w)
 #define linuxaio_iocbpmax ((loop)->linuxaio_iocbpmax)
 #define linuxaio_iocbps ((loop)->linuxaio_iocbps)
 #define linuxaio_iteration ((loop)->linuxaio_iteration)
 #define linuxaio_submitcnt ((loop)->linuxaio_submitcnt)
 #define linuxaio_submitmax ((loop)->linuxaio_submitmax)
 #define linuxaio_submits ((loop)->linuxaio_submits)
 #define loop_count ((loop)->loop_count)
 #define loop_depth ((loop)->loop_depth)
 #define loop_done ((loop)->loop_done)
@@ -89,6 +123,8 @@
 #define sigfd_w ((loop)->sigfd_w)
 #define timeout_blocktime ((loop)->timeout_blocktime)
 #define timercnt ((loop)->timercnt)
 #define timerfd ((loop)->timerfd)
 #define timerfd_w ((loop)->timerfd_w)
 #define timermax ((loop)->timermax)
 #define timers ((loop)->timers)
 #define userdata ((loop)->userdata)
@@ -143,12 +179,46 @@
 #undef invoke_cb
 #undef io_blocktime
 #undef iocp
 #undef iouring_cq_cqes
 #undef iouring_cq_head
 #undef iouring_cq_overflow
 #undef iouring_cq_ring
 #undef iouring_cq_ring_entries
 #undef iouring_cq_ring_mask
 #undef iouring_cq_ring_size
 #undef iouring_cq_tail
 #undef iouring_entries
 #undef iouring_fd
 #undef iouring_max_entries
 #undef iouring_sq_array
 #undef iouring_sq_dropped
 #undef iouring_sq_flags
 #undef iouring_sq_head
 #undef iouring_sq_ring
 #undef iouring_sq_ring_entries
 #undef iouring_sq_ring_mask
 #undef iouring_sq_ring_size
 #undef iouring_sq_tail
 #undef iouring_sqes
 #undef iouring_sqes_size
 #undef iouring_tfd
 #undef iouring_tfd_to
 #undef iouring_tfd_w
 #undef iouring_to_submit
 #undef kqueue_changecnt
 #undef kqueue_changemax
 #undef kqueue_changes
 #undef kqueue_eventmax
 #undef kqueue_events
 #undef kqueue_fd_pid
 #undef linuxaio_ctx
 #undef linuxaio_epoll_w
 #undef linuxaio_iocbpmax
 #undef linuxaio_iocbps
 #undef linuxaio_iteration
 #undef linuxaio_submitcnt
 #undef linuxaio_submitmax
 #undef linuxaio_submits
 #undef loop_count
 #undef loop_depth
 #undef loop_done
@@ -188,6 +258,8 @@
 #undef sigfd_w
 #undef timeout_blocktime
 #undef timercnt
 #undef timerfd
 #undef timerfd_w
 #undef timermax
 #undef timers
 #undef userdata
--- a/src/libserver/cfg_utils.c
+++ b/src/libserver/cfg_utils.c
@@ -2631,6 +2631,14 @@ rspamd_config_ev_backend_to_string (int ev_backend, gboolean *effective)
 	}

 	if (ev_backend & EVBACKEND_EPOLL) {
 		if (ev_backend & EVBACKEND_IOURING) {
 			SET_EFFECTIVE (TRUE);
 			return "epoll+io_uring";
 		}
 		if (ev_backend & EVBACKEND_LINUXAIO) {
 			SET_EFFECTIVE (TRUE);
 			return "epoll+aio";
 		}
 		SET_EFFECTIVE (TRUE);
 		return "epoll";
 	}