Browse Source

[Feature] Update libev to 4.33

Changelog: http://cvs.schmorp.de/libev/Changes?pathrev=rel-4_33
tags/2.5
Vsevolod Stakhov 4 years ago
parent
commit
4610fcee93

+ 7
- 1
contrib/libev/CMakeLists.txt View File

@@ -9,8 +9,12 @@ CHECK_INCLUDE_FILES(sys/stat.h HAVE_SYS_STAT_H)
CHECK_INCLUDE_FILES(sys/signalfd.h HAVE_SYS_SIGNALFD_H)
CHECK_INCLUDE_FILES(port.h HAVE_PORT_H)
CHECK_INCLUDE_FILES(poll.h HAVE_POLL_H)
CHECK_INCLUDE_FILES(memory.h HAVE_MEMORY_H)
CHECK_INCLUDE_FILES(sys/select.h HAVE_SYS_SELECT_H)
CHECK_INCLUDE_FILES(sys/eventfd.h HAVE_SYS_EVENTFD_H)
CHECK_INCLUDE_FILES(sys/timerfd.h HAVE_SYS_TIMERFD_H)
CHECK_INCLUDE_FILES(linux/fs.h HAVE_LINUX_FS_H)
CHECK_INCLUDE_FILES(linux/aio_abi.h HAVE_LINUX_AIO_ABI_H)

IF(HAVE_SYS_INOTIFY_H)
CHECK_SYMBOL_EXISTS(inotify_init "sys/types.h;sys/inotify.h" HAVE_INOTIFY_INIT)
@@ -36,7 +40,9 @@ ENDIF()
IF(HAVE_SYS_SIGNALFD_H)
CHECK_SYMBOL_EXISTS(signalfd sys/signalfd.h HAVE_EVENTFD)
ENDIF()

IF(HAVE_LINUX_FS_H)
CHECK_SYMBOL_EXISTS(RWF_SUPPORTED linux/fs.h HAVE_KERNEL_RWF_T)
ENDIF()
CHECK_SYMBOL_EXISTS(time.h nanosleep HAVE_NANOSLEEP)

# check first without rt

+ 12
- 7
contrib/libev/config.h.in View File

@@ -24,12 +24,21 @@
/* Define to 1 if you have the <inttypes.h> header file. */
#cmakedefine HAVE_INTTYPES_H 1

/* Define to 1 if linux/fs.h defined kernel_rwf_t */
#cmakedefine HAVE_KERNEL_RWF_T 1

/* Define to 1 if you have the `kqueue' function. */
#cmakedefine HAVE_KQUEUE 1

/* Define to 1 if you have the `rt' library (-lrt). */
#cmakedefine HAVE_LIBRT 1

/* Define to 1 if you have the <linux/aio_abi.h> header file. */
#cmakedefine HAVE_LINUX_AIO_ABI_H 1

/* Define to 1 if you have the <linux/fs.h> header file. */
#cmakedefine HAVE_LINUX_FS_H 1

/* Define to 1 if you have the <memory.h> header file. */
#cmakedefine HAVE_MEMORY_H 1

@@ -87,18 +96,14 @@
/* Define to 1 if you have the <sys/stat.h> header file. */
#cmakedefine HAVE_SYS_STAT_H 1

/* Define to 1 if you have the <sys/timerfd.h> header file. */
#cmakedefine HAVE_SYS_TIMERFD_H 1

/* Define to 1 if you have the <sys/types.h> header file. */
#cmakedefine HAVE_SYS_TYPES_H 1

/* Define to 1 if you have the <unistd.h> header file. */
#cmakedefine HAVE_UNISTD_H 1


/* Define to the version of this package. */
#define PACKAGE_VERSION 4.25

/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1

/* Version number of package */
#undef VERSION

+ 727
- 260
contrib/libev/ev.c
File diff suppressed because it is too large
View File


+ 32
- 29
contrib/libev/ev.h View File

@@ -1,7 +1,7 @@
/*
* libev native API header
*
* Copyright (c) 2007-2018 Marc Alexander Lehmann <libev@schmorp.de>
* Copyright (c) 2007-2020 Marc Alexander Lehmann <libev@schmorp.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modifica-
@@ -151,7 +151,10 @@ EV_CPP(extern "C" {)

/*****************************************************************************/

typedef double ev_tstamp;
#ifndef EV_TSTAMP_T
# define EV_TSTAMP_T double
#endif
typedef EV_TSTAMP_T ev_tstamp;

#include <string.h> /* for memmove */

@@ -212,7 +215,7 @@ struct ev_loop;
/*****************************************************************************/

#define EV_VERSION_MAJOR 4
#define EV_VERSION_MINOR 25
#define EV_VERSION_MINOR 33

/* eventmask, revents, events... */
enum {
@@ -389,14 +392,12 @@ typedef struct ev_stat
} ev_stat;
#endif

#if EV_IDLE_ENABLE
/* invoked when the nothing else needs to be done, keeps the process from blocking */
/* revent EV_IDLE */
typedef struct ev_idle
{
EV_WATCHER (ev_idle)
} ev_idle;
#endif

/* invoked for each run of the mainloop, just before the blocking call */
/* you can still change events in any way you like */
@@ -413,23 +414,19 @@ typedef struct ev_check
EV_WATCHER (ev_check)
} ev_check;

#if EV_FORK_ENABLE
/* the callback gets invoked before check in the child process when a fork was detected */
/* revent EV_FORK */
typedef struct ev_fork
{
EV_WATCHER (ev_fork)
} ev_fork;
#endif

#if EV_CLEANUP_ENABLE
/* is invoked just before the loop gets destroyed */
/* revent EV_CLEANUP */
typedef struct ev_cleanup
{
EV_WATCHER (ev_cleanup)
} ev_cleanup;
#endif

#if EV_EMBED_ENABLE
/* used to embed an event loop inside another */
@@ -439,16 +436,18 @@ typedef struct ev_embed
EV_WATCHER (ev_embed)

struct ev_loop *other; /* ro */
#undef EV_IO_ENABLE
#define EV_IO_ENABLE 1
ev_io io; /* private */
#undef EV_PREPARE_ENABLE
#define EV_PREPARE_ENABLE 1
ev_prepare prepare; /* private */
ev_check check; /* unused */
ev_timer timer; /* unused */
ev_periodic periodic; /* unused */
ev_idle idle; /* unused */
ev_fork fork; /* private */
#if EV_CLEANUP_ENABLE
ev_cleanup cleanup; /* unused */
#endif
} ev_embed;
#endif

@@ -501,29 +500,32 @@ union ev_any_watcher
/* flag bits for ev_default_loop and ev_loop_new */
enum {
/* the default */
EVFLAG_AUTO = 0x00000000U, /* not quite a mask */
EVFLAG_AUTO = 0x00000000U, /* not quite a mask */
/* flag bits */
EVFLAG_NOENV = 0x01000000U, /* do NOT consult environment */
EVFLAG_FORKCHECK = 0x02000000U, /* check for a fork in each iteration */
EVFLAG_NOENV = 0x01000000U, /* do NOT consult environment */
EVFLAG_FORKCHECK = 0x02000000U, /* check for a fork in each iteration */
/* debugging/feature disable */
EVFLAG_NOINOTIFY = 0x00100000U, /* do not attempt to use inotify */
EVFLAG_NOINOTIFY = 0x00100000U, /* do not attempt to use inotify */
#if EV_COMPAT3
EVFLAG_NOSIGFD = 0, /* compatibility to pre-3.9 */
EVFLAG_NOSIGFD = 0, /* compatibility to pre-3.9 */
#endif
EVFLAG_SIGNALFD = 0x00200000U, /* attempt to use signalfd */
EVFLAG_NOSIGMASK = 0x00400000U /* avoid modifying the signal mask */
EVFLAG_SIGNALFD = 0x00200000U, /* attempt to use signalfd */
EVFLAG_NOSIGMASK = 0x00400000U, /* avoid modifying the signal mask */
EVFLAG_NOTIMERFD = 0x00800000U /* avoid creating a timerfd */
};

/* method bits to be ored together */
enum {
EVBACKEND_SELECT = 0x00000001U, /* available just about anywhere */
EVBACKEND_POLL = 0x00000002U, /* !win, !aix, broken on osx */
EVBACKEND_EPOLL = 0x00000004U, /* linux */
EVBACKEND_KQUEUE = 0x00000008U, /* bsd, broken on osx */
EVBACKEND_DEVPOLL = 0x00000010U, /* solaris 8 */ /* NYI */
EVBACKEND_PORT = 0x00000020U, /* solaris 10 */
EVBACKEND_ALL = 0x0000003FU, /* all known backends */
EVBACKEND_MASK = 0x0000FFFFU /* all future backends */
EVBACKEND_SELECT = 0x00000001U, /* available just about anywhere */
EVBACKEND_POLL = 0x00000002U, /* !win, !aix, broken on osx */
EVBACKEND_EPOLL = 0x00000004U, /* linux */
EVBACKEND_KQUEUE = 0x00000008U, /* bsd, broken on osx */
EVBACKEND_DEVPOLL = 0x00000010U, /* solaris 8 */ /* NYI */
EVBACKEND_PORT = 0x00000020U, /* solaris 10 */
EVBACKEND_LINUXAIO = 0x00000040U, /* linux AIO, 4.19+ */
EVBACKEND_IOURING = 0x00000080U, /* linux io_uring, 5.1+ */
EVBACKEND_ALL = 0x000000FFU, /* all known backends */
EVBACKEND_MASK = 0x0000FFFFU /* all future backends */
};

#if EV_PROTOTYPES
@@ -557,7 +559,6 @@ EV_API_DECL void ev_set_syserr_cb (void (*cb)(const char *msg) EV_NOEXCEPT) EV_N
/* you can call this as often as you like */
EV_API_DECL struct ev_loop *ev_default_loop (unsigned int flags EV_CPP (= 0)) EV_NOEXCEPT;


/* create and destroy alternative loops that don't handle signals */
EV_API_DECL struct ev_loop *ev_loop_new (unsigned int flags EV_CPP (= 0)) EV_NOEXCEPT;

@@ -643,6 +644,8 @@ EV_API_DECL int ev_active_cnt (EV_P) EV_NOEXCEPT;
*/
EV_API_DECL void ev_once (EV_P_ int fd, int events, ev_tstamp timeout, void (*cb)(int revents, void *arg), void *arg) EV_NOEXCEPT;

EV_API_DECL void ev_invoke_pending (EV_P); /* invoke all pending watchers */

# if EV_FEATURE_API
EV_API_DECL unsigned int ev_iteration (EV_P) EV_NOEXCEPT; /* number of loop iterations */
EV_API_DECL unsigned int ev_depth (EV_P) EV_NOEXCEPT; /* #ev_loop enters - #ev_loop leaves */
@@ -660,7 +663,6 @@ EV_API_DECL void ev_set_invoke_pending_cb (EV_P_ ev_loop_callback invoke_pending
EV_API_DECL void ev_set_loop_release_cb (EV_P_ void (*release)(EV_P) EV_NOEXCEPT, void (*acquire)(EV_P) EV_NOEXCEPT) EV_NOEXCEPT;

EV_API_DECL unsigned int ev_pending_count (EV_P) EV_NOEXCEPT; /* number of pending events, if any */
EV_API_DECL void ev_invoke_pending (EV_P); /* invoke all pending watchers */

/*
* stop/start the timer handling.
@@ -680,6 +682,7 @@ EV_API_DECL void ev_resume (EV_P) EV_NOEXCEPT;
ev_set_cb ((ev), cb_); \
} while (0)

#define ev_io_modify(ev,events_) do { (ev)->events = (ev)->events & EV__IOFDSET | (events_); } while (0)
#define ev_io_set(ev,fd_,events_) do { (ev)->fd = (fd_); (ev)->events = (events_) | EV__IOFDSET; } while (0)
#define ev_timer_set(ev,after_,repeat_) do { ((ev_watcher_time *)(ev))->at = (after_); (ev)->repeat = (repeat_); } while (0)
#define ev_periodic_set(ev,ofs_,ival_,rcb_) do { (ev)->offset = (ofs_); (ev)->interval = (ival_); (ev)->reschedule_cb = (rcb_); } while (0)
@@ -712,7 +715,6 @@ EV_API_DECL void ev_resume (EV_P) EV_NOEXCEPT;
#define ev_is_active(ev) (0 + ((ev_watcher *)(void *)(ev))->active) /* ro, true when the watcher has been started */
#define ev_can_stop(ev) (ev_is_pending(ev) || ev_is_active(ev)) /* ro, true when the watcher has been started */


#define ev_cb_(ev) (ev)->cb /* rw */
#define ev_cb(ev) (memmove (&ev_cb_ (ev), &((ev_watcher *)(ev))->cb, sizeof (ev_cb_ (ev))), (ev)->cb)

@@ -727,6 +729,7 @@ EV_API_DECL void ev_resume (EV_P) EV_NOEXCEPT;
#define ev_periodic_at(ev) (+((ev_watcher_time *)(ev))->at)

#ifndef ev_set_cb
/* memmove is used here to avoid strict aliasing violations, and hopefully is optimized out by any reasonable compiler */
# define ev_set_cb(ev,cb_) (ev_cb_ (ev) = (cb_), memmove (&((ev_watcher *)(ev))->cb, &ev_cb_ (ev), sizeof (ev_cb_ (ev))))
#endif


+ 41
- 28
contrib/libev/ev_epoll.c View File

@@ -1,7 +1,7 @@
/*
* libev epoll fd activity backend
*
* Copyright (c) 2007,2008,2009,2010,2011 Marc Alexander Lehmann <libev@schmorp.de>
* Copyright (c) 2007,2008,2009,2010,2011,2016,2017,2019 Marc Alexander Lehmann <libev@schmorp.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modifica-
@@ -93,10 +93,10 @@ epoll_modify (EV_P_ int fd, int oev, int nev)
ev.events = (nev & EV_READ ? EPOLLIN : 0)
| (nev & EV_WRITE ? EPOLLOUT : 0);

if (expect_true (!epoll_ctl (backend_fd, oev && oldmask != nev ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, fd, &ev)))
if (ecb_expect_true (!epoll_ctl (backend_fd, oev && oldmask != nev ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, fd, &ev)))
return;

if (expect_true (errno == ENOENT))
if (ecb_expect_true (errno == ENOENT))
{
/* if ENOENT then the fd went away, so try to do the right thing */
if (!nev)
@@ -105,7 +105,7 @@ epoll_modify (EV_P_ int fd, int oev, int nev)
if (!epoll_ctl (backend_fd, EPOLL_CTL_ADD, fd, &ev))
return;
}
else if (expect_true (errno == EEXIST))
else if (ecb_expect_true (errno == EEXIST))
{
/* EEXIST means we ignored a previous DEL, but the fd is still active */
/* if the kernel mask is the same as the new mask, we assume it hasn't changed */
@@ -115,7 +115,7 @@ epoll_modify (EV_P_ int fd, int oev, int nev)
if (!epoll_ctl (backend_fd, EPOLL_CTL_MOD, fd, &ev))
return;
}
else if (expect_true (errno == EPERM))
else if (ecb_expect_true (errno == EPERM))
{
/* EPERM means the fd is always ready, but epoll is too snobbish */
/* to handle it, unlike select or poll. */
@@ -124,12 +124,14 @@ epoll_modify (EV_P_ int fd, int oev, int nev)
/* add fd to epoll_eperms, if not already inside */
if (!(oldmask & EV_EMASK_EPERM))
{
array_needsize (int, epoll_eperms, epoll_epermmax, epoll_epermcnt + 1, EMPTY2);
array_needsize (int, epoll_eperms, epoll_epermmax, epoll_epermcnt + 1, array_needsize_noinit);
epoll_eperms [epoll_epermcnt++] = fd;
}

return;
}
else
assert (("libev: I/O watcher with invalid fd found in epoll_ctl", errno != EBADF && errno != ELOOP && errno != EINVAL));

fd_kill (EV_A_ fd);

@@ -144,16 +146,16 @@ epoll_poll (EV_P_ ev_tstamp timeout)
int i;
int eventcnt;

if (expect_false (epoll_epermcnt))
timeout = 0.;
if (ecb_expect_false (epoll_epermcnt))
timeout = EV_TS_CONST (0.);

/* epoll wait times cannot be larger than (LONG_MAX - 999UL) / HZ msecs, which is below */
/* the default libev max wait time, however. */
EV_RELEASE_CB;
eventcnt = epoll_wait (backend_fd, epoll_events, epoll_eventmax, timeout * 1e3);
eventcnt = epoll_wait (backend_fd, epoll_events, epoll_eventmax, EV_TS_TO_MSEC (timeout));
EV_ACQUIRE_CB;

if (expect_false (eventcnt < 0))
if (ecb_expect_false (eventcnt < 0))
{
if (errno != EINTR)
ev_syserr ("(libev) epoll_wait");
@@ -176,14 +178,14 @@ epoll_poll (EV_P_ ev_tstamp timeout)
* other spurious notifications will be found by epoll_ctl, below
* we assume that fd is always in range, as we never shrink the anfds array
*/
if (expect_false ((uint32_t)anfds [fd].egen != (uint32_t)(ev->data.u64 >> 32)))
if (ecb_expect_false ((uint32_t)anfds [fd].egen != (uint32_t)(ev->data.u64 >> 32)))
{
/* recreate kernel state */
postfork |= 2;
continue;
}

if (expect_false (got & ~want))
if (ecb_expect_false (got & ~want))
{
anfds [fd].emask = want;

@@ -195,6 +197,8 @@ epoll_poll (EV_P_ ev_tstamp timeout)
* above with the gencounter check (== our fd is not the event fd), and
* partially here, when epoll_ctl returns an error (== a child has the fd
* but we closed it).
* note: for events such as POLLHUP, where we can't know whether it refers
* to EV_READ or EV_WRITE, we might issue redundant EPOLL_CTL_MOD calls.
*/
ev->events = (want & EV_READ ? EPOLLIN : 0)
| (want & EV_WRITE ? EPOLLOUT : 0);
@@ -212,7 +216,7 @@ epoll_poll (EV_P_ ev_tstamp timeout)
}

/* if the receive array was full, increase its size */
if (expect_false (eventcnt == epoll_eventmax))
if (ecb_expect_false (eventcnt == epoll_eventmax))
{
ev_free (epoll_events);
epoll_eventmax = array_nextsize (sizeof (struct epoll_event), epoll_eventmax, epoll_eventmax + 1);
@@ -235,23 +239,34 @@ epoll_poll (EV_P_ ev_tstamp timeout)
}
}

inline_size
int
epoll_init (EV_P_ int flags)
static int
epoll_epoll_create (void)
{
int fd;

#if defined EPOLL_CLOEXEC && !defined __ANDROID__
backend_fd = epoll_create1 (EPOLL_CLOEXEC);
fd = epoll_create1 (EPOLL_CLOEXEC);

if (backend_fd < 0 && (errno == EINVAL || errno == ENOSYS))
if (fd < 0 && (errno == EINVAL || errno == ENOSYS))
#endif
backend_fd = epoll_create (256);
{
fd = epoll_create (256);

if (backend_fd < 0)
return 0;
if (fd >= 0)
fcntl (fd, F_SETFD, FD_CLOEXEC);
}

return fd;
}

fcntl (backend_fd, F_SETFD, FD_CLOEXEC);
inline_size
int
epoll_init (EV_P_ int flags)
{
if ((backend_fd = epoll_epoll_create ()) < 0)
return 0;

backend_mintime = 1e-3; /* epoll does sometimes return early, this is just to avoid the worst */
backend_mintime = EV_TS_CONST (1e-3); /* epoll does sometimes return early, this is just to avoid the worst */
backend_modify = epoll_modify;
backend_poll = epoll_poll;

@@ -269,17 +284,15 @@ epoll_destroy (EV_P)
array_free (epoll_eperm, EMPTY);
}

inline_size
void
ecb_cold
static void
epoll_fork (EV_P)
{
close (backend_fd);

while ((backend_fd = epoll_create (256)) < 0)
while ((backend_fd = epoll_epoll_create ()) < 0)
ev_syserr ("(libev) epoll_create");

fcntl (backend_fd, F_SETFD, FD_CLOEXEC);

fd_rearm_all (EV_A);
}


+ 694
- 0
contrib/libev/ev_iouring.c View File

@@ -0,0 +1,694 @@
/*
* libev linux io_uring fd activity backend
*
* Copyright (c) 2019-2020 Marc Alexander Lehmann <libev@schmorp.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modifica-
* tion, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
* CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
* ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Alternatively, the contents of this file may be used under the terms of
* the GNU General Public License ("GPL") version 2 or any later version,
* in which case the provisions of the GPL are applicable instead of
* the above. If you wish to allow the use of your version of this file
* only under the terms of the GPL and not to allow others to use your
* version of this file under the BSD license, indicate your decision
* by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL. If you do not delete the
* provisions above, a recipient may use your version of this file under
* either the BSD or the GPL.
*/

/*
* general notes about linux io_uring:
*
* a) it's the best interface I have seen so far. on linux.
* b) best is not necessarily very good.
* c) it's better than the aio mess, doesn't suffer from the fork problems
* of linux aio or epoll and so on and so on. and you could do event stuff
* without any syscalls. what's not to like?
* d) ok, it's vastly more complex, but that's ok, really.
* e) why two mmaps instead of one? one would be more space-efficient,
* and I can't see what benefit two would have (other than being
* somehow resizable/relocatable, but that's apparently not possible).
* f) hmm, it's practically undebuggable (gdb can't access the memory, and
* the bizarre way structure offsets are communicated makes it hard to
* just print the ring buffer heads, even *iff* the memory were visible
* in gdb. but then, that's also ok, really.
* g) well, you cannot specify a timeout when waiting for events. no,
* seriously, the interface doesn't support a timeout. never seen _that_
* before. sure, you can use a timerfd, but that's another syscall
* you could have avoided. overall, this bizarre omission smells
* like a µ-optimisation by the io_uring author for his personal
* applications, to the detriment of everybody else who just wants
* an event loop. but, umm, ok, if that's all, it could be worse.
* (from what I gather from the author Jens Axboe, it simply didn't
* occur to him, and he made good on it by adding an unlimited nuber
* of timeouts later :).
* h) initially there was a hardcoded limit of 4096 outstanding events.
* later versions not only bump this to 32k, but also can handle
* an unlimited amount of events, so this only affects the batch size.
* i) unlike linux aio, you *can* register more then the limit
* of fd events. while early verisons of io_uring signalled an overflow
* and you ended up getting wet. 5.5+ does not do this anymore.
* j) but, oh my! it had exactly the same bugs as the linux aio backend,
* where some undocumented poll combinations just fail. fortunately,
* after finally reaching the author, he was more than willing to fix
* this probably in 5.6+.
* k) overall, the *API* itself is, I dare to say, not a total trainwreck.
* once the bugs ae fixed (probably in 5.6+), it will be without
* competition.
*/

/* TODO: use internal TIMEOUT */
/* TODO: take advantage of single mmap, NODROP etc. */
/* TODO: resize cq/sq size independently */

#include <sys/timerfd.h>
#include <sys/mman.h>
#include <poll.h>
#include <stdint.h>

#define IOURING_INIT_ENTRIES 32

/*****************************************************************************/
/* syscall wrapdadoop - this section has the raw api/abi definitions */

#include <linux/fs.h>
#include <linux/types.h>

/* mostly directly taken from the kernel or documentation */

struct io_uring_sqe
{
__u8 opcode;
__u8 flags;
__u16 ioprio;
__s32 fd;
union {
__u64 off;
__u64 addr2;
};
__u64 addr;
__u32 len;
union {
__kernel_rwf_t rw_flags;
__u32 fsync_flags;
__u16 poll_events;
__u32 sync_range_flags;
__u32 msg_flags;
__u32 timeout_flags;
__u32 accept_flags;
__u32 cancel_flags;
__u32 open_flags;
__u32 statx_flags;
};
__u64 user_data;
union {
__u16 buf_index;
__u64 __pad2[3];
};
};

struct io_uring_cqe
{
__u64 user_data;
__s32 res;
__u32 flags;
};

struct io_sqring_offsets
{
__u32 head;
__u32 tail;
__u32 ring_mask;
__u32 ring_entries;
__u32 flags;
__u32 dropped;
__u32 array;
__u32 resv1;
__u64 resv2;
};

struct io_cqring_offsets
{
__u32 head;
__u32 tail;
__u32 ring_mask;
__u32 ring_entries;
__u32 overflow;
__u32 cqes;
__u64 resv[2];
};

struct io_uring_params
{
__u32 sq_entries;
__u32 cq_entries;
__u32 flags;
__u32 sq_thread_cpu;
__u32 sq_thread_idle;
__u32 features;
__u32 resv[4];
struct io_sqring_offsets sq_off;
struct io_cqring_offsets cq_off;
};

#define IORING_SETUP_CQSIZE 0x00000008

#define IORING_OP_POLL_ADD 6
#define IORING_OP_POLL_REMOVE 7
#define IORING_OP_TIMEOUT 11
#define IORING_OP_TIMEOUT_REMOVE 12

/* relative or absolute, reference clock is CLOCK_MONOTONIC */
struct iouring_kernel_timespec
{
int64_t tv_sec;
long long tv_nsec;
};

#define IORING_TIMEOUT_ABS 0x00000001

#define IORING_ENTER_GETEVENTS 0x01

#define IORING_OFF_SQ_RING 0x00000000ULL
#define IORING_OFF_CQ_RING 0x08000000ULL
#define IORING_OFF_SQES 0x10000000ULL

#define IORING_FEAT_SINGLE_MMAP 0x00000001
#define IORING_FEAT_NODROP 0x00000002
#define IORING_FEAT_SUBMIT_STABLE 0x00000004

inline_size
int
evsys_io_uring_setup (unsigned entries, struct io_uring_params *params)
{
return ev_syscall2 (SYS_io_uring_setup, entries, params);
}

inline_size
int
evsys_io_uring_enter (int fd, unsigned to_submit, unsigned min_complete, unsigned flags, const sigset_t *sig, size_t sigsz)
{
return ev_syscall6 (SYS_io_uring_enter, fd, to_submit, min_complete, flags, sig, sigsz);
}

/*****************************************************************************/
/* actual backed implementation */

/* we hope that volatile will make the compiler access this variables only once */
#define EV_SQ_VAR(name) *(volatile unsigned *)((char *)iouring_sq_ring + iouring_sq_ ## name)
#define EV_CQ_VAR(name) *(volatile unsigned *)((char *)iouring_cq_ring + iouring_cq_ ## name)

/* the index array */
#define EV_SQ_ARRAY ((unsigned *)((char *)iouring_sq_ring + iouring_sq_array))

/* the submit/completion queue entries */
#define EV_SQES ((struct io_uring_sqe *) iouring_sqes)
#define EV_CQES ((struct io_uring_cqe *)((char *)iouring_cq_ring + iouring_cq_cqes))

inline_speed
int
iouring_enter (EV_P_ ev_tstamp timeout)
{
int res;

EV_RELEASE_CB;

res = evsys_io_uring_enter (iouring_fd, iouring_to_submit, 1,
timeout > EV_TS_CONST (0.) ? IORING_ENTER_GETEVENTS : 0, 0, 0);

assert (("libev: io_uring_enter did not consume all sqes", (res < 0 || res == iouring_to_submit)));

iouring_to_submit = 0;

EV_ACQUIRE_CB;

return res;
}

/* TODO: can we move things around so we don't need this forward-reference? */
static void
iouring_poll (EV_P_ ev_tstamp timeout);

static
struct io_uring_sqe *
iouring_sqe_get (EV_P)
{
unsigned tail;
for (;;)
{
tail = EV_SQ_VAR (tail);

if (ecb_expect_true (tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries)))
break; /* whats the problem, we have free sqes */

/* queue full, need to flush and possibly handle some events */

#if EV_FEATURE_CODE
/* first we ask the kernel nicely, most often this frees up some sqes */
int res = iouring_enter (EV_A_ EV_TS_CONST (0.));

ECB_MEMORY_FENCE_ACQUIRE; /* better safe than sorry */

if (res >= 0)
continue; /* yes, it worked, try again */
#endif

/* some problem, possibly EBUSY - do the full poll and let it handle any issues */

iouring_poll (EV_A_ EV_TS_CONST (0.));
/* iouring_poll should have done ECB_MEMORY_FENCE_ACQUIRE for us */
}

/*assert (("libev: io_uring queue full after flush", tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries)));*/

return EV_SQES + (tail & EV_SQ_VAR (ring_mask));
}

inline_size
struct io_uring_sqe *
iouring_sqe_submit (EV_P_ struct io_uring_sqe *sqe)
{
unsigned idx = sqe - EV_SQES;

EV_SQ_ARRAY [idx] = idx;
ECB_MEMORY_FENCE_RELEASE;
++EV_SQ_VAR (tail);
/*ECB_MEMORY_FENCE_RELEASE; /* for the time being we assume this is not needed */
++iouring_to_submit;
}

/*****************************************************************************/

/* when the timerfd expires we simply note the fact,
* as the purpose of the timerfd is to wake us up, nothing else.
* the next iteration should re-set it.
*/
static void
iouring_tfd_cb (EV_P_ struct ev_io *w, int revents)
{
iouring_tfd_to = EV_TSTAMP_HUGE;
}

/* called for full and partial cleanup */
ecb_cold
static int
iouring_internal_destroy (EV_P)
{
close (iouring_tfd);
close (iouring_fd);

if (iouring_sq_ring != MAP_FAILED) munmap (iouring_sq_ring, iouring_sq_ring_size);
if (iouring_cq_ring != MAP_FAILED) munmap (iouring_cq_ring, iouring_cq_ring_size);
if (iouring_sqes != MAP_FAILED) munmap (iouring_sqes , iouring_sqes_size );

if (ev_is_active (&iouring_tfd_w))
{
ev_ref (EV_A);
ev_io_stop (EV_A_ &iouring_tfd_w);
}
}

ecb_cold
static int
iouring_internal_init (EV_P)
{
struct io_uring_params params = { 0 };

iouring_to_submit = 0;

iouring_tfd = -1;
iouring_sq_ring = MAP_FAILED;
iouring_cq_ring = MAP_FAILED;
iouring_sqes = MAP_FAILED;

if (!have_monotonic) /* cannot really happen, but what if11 */
return -1;

for (;;)
{
iouring_fd = evsys_io_uring_setup (iouring_entries, &params);

if (iouring_fd >= 0)
break; /* yippie */

if (errno != EINVAL)
return -1; /* we failed */

#if TODO
if ((~params.features) & (IORING_FEAT_NODROP | IORING_FEATURE_SINGLE_MMAP | IORING_FEAT_SUBMIT_STABLE))
return -1; /* we require the above features */
#endif

/* EINVAL: lots of possible reasons, but maybe
* it is because we hit the unqueryable hardcoded size limit
*/

/* we hit the limit already, give up */
if (iouring_max_entries)
return -1;

/* first time we hit EINVAL? assume we hit the limit, so go back and retry */
iouring_entries >>= 1;
iouring_max_entries = iouring_entries;
}

iouring_sq_ring_size = params.sq_off.array + params.sq_entries * sizeof (unsigned);
iouring_cq_ring_size = params.cq_off.cqes + params.cq_entries * sizeof (struct io_uring_cqe);
iouring_sqes_size = params.sq_entries * sizeof (struct io_uring_sqe);

iouring_sq_ring = mmap (0, iouring_sq_ring_size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, iouring_fd, IORING_OFF_SQ_RING);
iouring_cq_ring = mmap (0, iouring_cq_ring_size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, iouring_fd, IORING_OFF_CQ_RING);
iouring_sqes = mmap (0, iouring_sqes_size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, iouring_fd, IORING_OFF_SQES);

if (iouring_sq_ring == MAP_FAILED || iouring_cq_ring == MAP_FAILED || iouring_sqes == MAP_FAILED)
return -1;

iouring_sq_head = params.sq_off.head;
iouring_sq_tail = params.sq_off.tail;
iouring_sq_ring_mask = params.sq_off.ring_mask;
iouring_sq_ring_entries = params.sq_off.ring_entries;
iouring_sq_flags = params.sq_off.flags;
iouring_sq_dropped = params.sq_off.dropped;
iouring_sq_array = params.sq_off.array;

iouring_cq_head = params.cq_off.head;
iouring_cq_tail = params.cq_off.tail;
iouring_cq_ring_mask = params.cq_off.ring_mask;
iouring_cq_ring_entries = params.cq_off.ring_entries;
iouring_cq_overflow = params.cq_off.overflow;
iouring_cq_cqes = params.cq_off.cqes;

iouring_tfd = timerfd_create (CLOCK_MONOTONIC, TFD_CLOEXEC);

if (iouring_tfd < 0)
return iouring_tfd;

iouring_tfd_to = EV_TSTAMP_HUGE;

return 0;
}

ecb_cold
static void
iouring_fork (EV_P)
{
iouring_internal_destroy (EV_A);

while (iouring_internal_init (EV_A) < 0)
ev_syserr ("(libev) io_uring_setup");

fd_rearm_all (EV_A);

ev_io_stop (EV_A_ &iouring_tfd_w);
ev_io_set (EV_A_ &iouring_tfd_w, iouring_tfd, EV_READ);
ev_io_start (EV_A_ &iouring_tfd_w);
}

/*****************************************************************************/

static void
iouring_modify (EV_P_ int fd, int oev, int nev)
{
if (oev)
{
/* we assume the sqe's are all "properly" initialised */
struct io_uring_sqe *sqe = iouring_sqe_get (EV_A);
sqe->opcode = IORING_OP_POLL_REMOVE;
sqe->fd = fd;
/* Jens Axboe notified me that user_data is not what is documented, but is
* some kind of unique ID that has to match, otherwise the request cannot
* be removed. Since we don't *really* have that, we pass in the old
* generation counter - if that fails, too bad, it will hopefully be removed
* at close time and then be ignored. */
sqe->addr = (uint32_t)fd | ((__u64)(uint32_t)anfds [fd].egen << 32);
sqe->user_data = (uint64_t)-1;
iouring_sqe_submit (EV_A_ sqe);

/* increment generation counter to avoid handling old events */
++anfds [fd].egen;
}

if (nev)
{
struct io_uring_sqe *sqe = iouring_sqe_get (EV_A);
sqe->opcode = IORING_OP_POLL_ADD;
sqe->fd = fd;
sqe->addr = 0;
sqe->user_data = (uint32_t)fd | ((__u64)(uint32_t)anfds [fd].egen << 32);
sqe->poll_events =
(nev & EV_READ ? POLLIN : 0)
| (nev & EV_WRITE ? POLLOUT : 0);
iouring_sqe_submit (EV_A_ sqe);
}
}

inline_size
void
iouring_tfd_update (EV_P_ ev_tstamp timeout)
{
ev_tstamp tfd_to = mn_now + timeout;

/* we assume there will be many iterations per timer change, so
* we only re-set the timerfd when we have to because its expiry
* is too late.
*/
if (ecb_expect_false (tfd_to < iouring_tfd_to))
{
struct itimerspec its;

iouring_tfd_to = tfd_to;
EV_TS_SET (its.it_interval, 0.);
EV_TS_SET (its.it_value, tfd_to);

if (timerfd_settime (iouring_tfd, TFD_TIMER_ABSTIME, &its, 0) < 0)
assert (("libev: iouring timerfd_settime failed", 0));
}
}

inline_size
void
iouring_process_cqe (EV_P_ struct io_uring_cqe *cqe)
{
int fd = cqe->user_data & 0xffffffffU;
uint32_t gen = cqe->user_data >> 32;
int res = cqe->res;

/* user_data -1 is a remove that we are not atm. interested in */
if (cqe->user_data == (uint64_t)-1)
return;

assert (("libev: io_uring fd must be in-bounds", fd >= 0 && fd < anfdmax));

/* documentation lies, of course. the result value is NOT like
* normal syscalls, but like linux raw syscalls, i.e. negative
* error numbers. fortunate, as otherwise there would be no way
* to get error codes at all. still, why not document this?
*/

/* ignore event if generation doesn't match */
/* other than skipping removal events, */
/* this should actually be very rare */
if (ecb_expect_false (gen != (uint32_t)anfds [fd].egen))
return;

if (ecb_expect_false (res < 0))
{
/*TODO: EINVAL handling (was something failed with this fd)*/

if (res == -EBADF)
{
assert (("libev: event loop rejected bad fd", res != -EBADF));
fd_kill (EV_A_ fd);
}
else
{
errno = -res;
ev_syserr ("(libev) IORING_OP_POLL_ADD");
}

return;
}

/* feed events, we do not expect or handle POLLNVAL */
fd_event (
EV_A_
fd,
(res & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0)
| (res & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0)
);

/* io_uring is oneshot, so we need to re-arm the fd next iteration */
/* this also means we usually have to do at least one syscall per iteration */
anfds [fd].events = 0;
fd_change (EV_A_ fd, EV_ANFD_REIFY);
}

/* called when the event queue overflows */
ecb_cold
static void
iouring_overflow (EV_P)
{
/* we have two options, resize the queue (by tearing down
* everything and recreating it, or living with it
* and polling.
* we implement this by resizing the queue, and, if that fails,
* we just recreate the state on every failure, which
* kind of is a very inefficient poll.
* one danger is, due to the bios toward lower fds,
* we will only really get events for those, so
* maybe we need a poll() fallback, after all.
*/
/*EV_CQ_VAR (overflow) = 0;*/ /* need to do this if we keep the state and poll manually */

fd_rearm_all (EV_A);

/* we double the size until we hit the hard-to-probe maximum */
if (!iouring_max_entries)
{
iouring_entries <<= 1;
iouring_fork (EV_A);
}
else
{
/* we hit the kernel limit, we should fall back to something else.
* we can either poll() a few times and hope for the best,
* poll always, or switch to epoll.
* TODO: is this necessary with newer kernels?
*/

iouring_internal_destroy (EV_A);

/* this should make it so that on return, we don't call any uring functions */
iouring_to_submit = 0;

for (;;)
{
backend = epoll_init (EV_A_ 0);

if (backend)
break;

ev_syserr ("(libev) iouring switch to epoll");
}
}
}

/* handle any events in the completion queue, return true if there were any */
static int
iouring_handle_cq (EV_P)
{
unsigned head, tail, mask;
head = EV_CQ_VAR (head);
ECB_MEMORY_FENCE_ACQUIRE;
tail = EV_CQ_VAR (tail);

if (head == tail)
return 0;

/* it can only overflow if we have events, yes, yes? */
if (ecb_expect_false (EV_CQ_VAR (overflow)))
{
iouring_overflow (EV_A);
return 1;
}

mask = EV_CQ_VAR (ring_mask);

do
iouring_process_cqe (EV_A_ &EV_CQES [head++ & mask]);
while (head != tail);

EV_CQ_VAR (head) = head;
ECB_MEMORY_FENCE_RELEASE;

return 1;
}

static void
iouring_poll (EV_P_ ev_tstamp timeout)
{
/* if we have events, no need for extra syscalls, but we might have to queue events */
/* we also clar the timeout if there are outstanding fdchanges */
/* the latter should only happen if both the sq and cq are full, most likely */
/* because we have a lot of event sources that immediately complete */
/* TODO: fdchacngecnt is always 0 because fd_reify does not have two buffers yet */
if (iouring_handle_cq (EV_A) || fdchangecnt)
timeout = EV_TS_CONST (0.);
else
/* no events, so maybe wait for some */
iouring_tfd_update (EV_A_ timeout);

/* only enter the kernel if we have something to submit, or we need to wait */
if (timeout || iouring_to_submit)
{
int res = iouring_enter (EV_A_ timeout);

if (ecb_expect_false (res < 0))
if (errno == EINTR)
/* ignore */;
else if (errno == EBUSY)
/* cq full, cannot submit - should be rare because we flush the cq first, so simply ignore */;
else
ev_syserr ("(libev) iouring setup");
else
iouring_handle_cq (EV_A);
}
}

inline_size
int
iouring_init (EV_P_ int flags)
{
iouring_entries = IOURING_INIT_ENTRIES;
iouring_max_entries = 0;

if (iouring_internal_init (EV_A) < 0)
{
iouring_internal_destroy (EV_A);
return 0;
}

ev_io_init (&iouring_tfd_w, iouring_tfd_cb, iouring_tfd, EV_READ);
ev_set_priority (&iouring_tfd_w, EV_MINPRI);
ev_io_start (EV_A_ &iouring_tfd_w);
ev_unref (EV_A); /* watcher should not keep loop alive */

backend_modify = iouring_modify;
backend_poll = iouring_poll;

return EVBACKEND_IOURING;
}

inline_size
void
iouring_destroy (EV_P)
{
iouring_internal_destroy (EV_A);
}


+ 15
- 9
contrib/libev/ev_kqueue.c View File

@@ -1,7 +1,7 @@
/*
* libev kqueue backend
*
* Copyright (c) 2007,2008,2009,2010,2011,2012,2013 Marc Alexander Lehmann <libev@schmorp.de>
* Copyright (c) 2007,2008,2009,2010,2011,2012,2013,2016,2019 Marc Alexander Lehmann <libev@schmorp.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modifica-
@@ -48,7 +48,7 @@ void
kqueue_change (EV_P_ int fd, int filter, int flags, int fflags)
{
++kqueue_changecnt;
array_needsize (struct kevent, kqueue_changes, kqueue_changemax, kqueue_changecnt, EMPTY2);
array_needsize (struct kevent, kqueue_changes, kqueue_changemax, kqueue_changecnt, array_needsize_noinit);

EV_SET (&kqueue_changes [kqueue_changecnt - 1], fd, filter, flags, fflags, 0, 0);
}
@@ -103,10 +103,10 @@ kqueue_poll (EV_P_ ev_tstamp timeout)
EV_ACQUIRE_CB;
kqueue_changecnt = 0;

if (expect_false (res < 0))
if (ecb_expect_false (res < 0))
{
if (errno != EINTR)
ev_syserr ("(libev) kevent");
ev_syserr ("(libev) kqueue kevent");

return;
}
@@ -115,7 +115,7 @@ kqueue_poll (EV_P_ ev_tstamp timeout)
{
int fd = kqueue_events [i].ident;

if (expect_false (kqueue_events [i].flags & EV_ERROR))
if (ecb_expect_false (kqueue_events [i].flags & EV_ERROR))
{
int err = kqueue_events [i].data;

@@ -129,10 +129,16 @@ kqueue_poll (EV_P_ ev_tstamp timeout)
if (fd_valid (fd))
kqueue_modify (EV_A_ fd, 0, anfds [fd].events);
else
fd_kill (EV_A_ fd);
{
assert (("libev: kqueue found invalid fd", 0));
fd_kill (EV_A_ fd);
}
}
else /* on all other errors, we error out on the fd */
fd_kill (EV_A_ fd);
{
assert (("libev: kqueue found invalid fd", 0));
fd_kill (EV_A_ fd);
}
}
}
else
@@ -145,7 +151,7 @@ kqueue_poll (EV_P_ ev_tstamp timeout)
);
}

if (expect_false (res == kqueue_eventmax))
if (ecb_expect_false (res == kqueue_eventmax))
{
ev_free (kqueue_events);
kqueue_eventmax = array_nextsize (sizeof (struct kevent), kqueue_eventmax, kqueue_eventmax + 1);
@@ -164,7 +170,7 @@ kqueue_init (EV_P_ int flags)

fcntl (backend_fd, F_SETFD, FD_CLOEXEC); /* not sure if necessary, hopefully doesn't hurt */

backend_mintime = 1e-9; /* apparently, they did the right thing in freebsd */
backend_mintime = EV_TS_CONST (1e-9); /* apparently, they did the right thing in freebsd */
backend_modify = kqueue_modify;
backend_poll = kqueue_poll;


+ 620
- 0
contrib/libev/ev_linuxaio.c View File

@@ -0,0 +1,620 @@
/*
* libev linux aio fd activity backend
*
* Copyright (c) 2019 Marc Alexander Lehmann <libev@schmorp.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modifica-
* tion, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
* CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
* ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Alternatively, the contents of this file may be used under the terms of
* the GNU General Public License ("GPL") version 2 or any later version,
* in which case the provisions of the GPL are applicable instead of
* the above. If you wish to allow the use of your version of this file
* only under the terms of the GPL and not to allow others to use your
* version of this file under the BSD license, indicate your decision
* by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL. If you do not delete the
* provisions above, a recipient may use your version of this file under
* either the BSD or the GPL.
*/

/*
* general notes about linux aio:
*
* a) at first, the linux aio IOCB_CMD_POLL functionality introduced in
* 4.18 looks too good to be true: both watchers and events can be
* batched, and events can even be handled in userspace using
* a ring buffer shared with the kernel. watchers can be canceled
* regardless of whether the fd has been closed. no problems with fork.
* ok, the ring buffer is 200% undocumented (there isn't even a
* header file), but otherwise, it's pure bliss!
* b) ok, watchers are one-shot, so you have to re-arm active ones
* on every iteration. so much for syscall-less event handling,
* but at least these re-arms can be batched, no big deal, right?
* c) well, linux as usual: the documentation lies to you: io_submit
* sometimes returns EINVAL because the kernel doesn't feel like
* handling your poll mask - ttys can be polled for POLLOUT,
* POLLOUT|POLLIN, but polling for POLLIN fails. just great,
* so we have to fall back to something else (hello, epoll),
* but at least the fallback can be slow, because these are
* exceptional cases, right?
* d) hmm, you have to tell the kernel the maximum number of watchers
* you want to queue when initialising the aio context. but of
* course the real limit is magically calculated in the kernel, and
* is often higher then we asked for. so we just have to destroy
* the aio context and re-create it a bit larger if we hit the limit.
* (starts to remind you of epoll? well, it's a bit more deterministic
* and less gambling, but still ugly as hell).
* e) that's when you find out you can also hit an arbitrary system-wide
* limit. or the kernel simply doesn't want to handle your watchers.
* what the fuck do we do then? you guessed it, in the middle
* of event handling we have to switch to 100% epoll polling. and
* that better is as fast as normal epoll polling, so you practically
* have to use the normal epoll backend with all its quirks.
* f) end result of this train wreck: it inherits all the disadvantages
* from epoll, while adding a number on its own. why even bother to use
* it? because if conditions are right and your fds are supported and you
* don't hit a limit, this backend is actually faster, doesn't gamble with
* your fds, batches watchers and events and doesn't require costly state
* recreates. well, until it does.
* g) all of this makes this backend use almost twice as much code as epoll.
* which in turn uses twice as much code as poll. and that#s not counting
* the fact that this backend also depends on the epoll backend, making
* it three times as much code as poll, or kqueue.
* h) bleah. why can't linux just do kqueue. sure kqueue is ugly, but by now
* it's clear that whatever linux comes up with is far, far, far worse.
*/

#include <sys/time.h> /* actually linux/time.h, but we must assume they are compatible */
#include <poll.h>
#include <linux/aio_abi.h>

/*****************************************************************************/
/* syscall wrapdadoop - this section has the raw api/abi definitions */

#include <sys/syscall.h> /* no glibc wrappers */

/* aio_abi.h is not versioned in any way, so we cannot test for its existance */
#define IOCB_CMD_POLL 5

/* taken from linux/fs/aio.c. yup, that's a .c file.
* not only is this totally undocumented, not even the source code
* can tell you what the future semantics of compat_features and
* incompat_features are, or what header_length actually is for.
*/
#define AIO_RING_MAGIC 0xa10a10a1
#define EV_AIO_RING_INCOMPAT_FEATURES 0
struct aio_ring
{
unsigned id; /* kernel internal index number */
unsigned nr; /* number of io_events */
unsigned head; /* Written to by userland or by kernel. */
unsigned tail;

unsigned magic;
unsigned compat_features;
unsigned incompat_features;
unsigned header_length; /* size of aio_ring */

struct io_event io_events[0];
};

inline_size
int
evsys_io_setup (unsigned nr_events, aio_context_t *ctx_idp)
{
return ev_syscall2 (SYS_io_setup, nr_events, ctx_idp);
}

inline_size
int
evsys_io_destroy (aio_context_t ctx_id)
{
return ev_syscall1 (SYS_io_destroy, ctx_id);
}

inline_size
int
evsys_io_submit (aio_context_t ctx_id, long nr, struct iocb *cbp[])
{
return ev_syscall3 (SYS_io_submit, ctx_id, nr, cbp);
}

inline_size
int
evsys_io_cancel (aio_context_t ctx_id, struct iocb *cbp, struct io_event *result)
{
return ev_syscall3 (SYS_io_cancel, ctx_id, cbp, result);
}

inline_size
int
evsys_io_getevents (aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout)
{
return ev_syscall5 (SYS_io_getevents, ctx_id, min_nr, nr, events, timeout);
}

/*****************************************************************************/
/* actual backed implementation */

ecb_cold
static int
linuxaio_nr_events (EV_P)
{
/* we start with 16 iocbs and incraese from there
* that's tiny, but the kernel has a rather low system-wide
* limit that can be reached quickly, so let's be parsimonious
* with this resource.
* Rest assured, the kernel generously rounds up small and big numbers
* in different ways (but doesn't seem to charge you for it).
* The 15 here is because the kernel usually has a power of two as aio-max-nr,
* and this helps to take advantage of that limit.
*/

/* we try to fill 4kB pages exactly.
* the ring buffer header is 32 bytes, every io event is 32 bytes.
* the kernel takes the io requests number, doubles it, adds 2
* and adds the ring buffer.
* the way we use this is by starting low, and then roughly doubling the
* size each time we hit a limit.
*/

int requests = 15 << linuxaio_iteration;
int one_page = (4096
/ sizeof (struct io_event) ) / 2; /* how many fit into one page */
int first_page = ((4096 - sizeof (struct aio_ring))
/ sizeof (struct io_event) - 2) / 2; /* how many fit into the first page */

/* if everything fits into one page, use count exactly */
if (requests > first_page)
/* otherwise, round down to full pages and add the first page */
requests = requests / one_page * one_page + first_page;

return requests;
}

/* we use out own wrapper structure in case we ever want to do something "clever" */
typedef struct aniocb
{
struct iocb io;
/*int inuse;*/
} *ANIOCBP;

inline_size
void
linuxaio_array_needsize_iocbp (ANIOCBP *base, int offset, int count)
{
while (count--)
{
/* TODO: quite the overhead to allocate every iocb separately, maybe use our own allocator? */
ANIOCBP iocb = (ANIOCBP)ev_malloc (sizeof (*iocb));

/* full zero initialise is probably not required at the moment, but
* this is not well documented, so we better do it.
*/
memset (iocb, 0, sizeof (*iocb));

iocb->io.aio_lio_opcode = IOCB_CMD_POLL;
iocb->io.aio_fildes = offset;

base [offset++] = iocb;
}
}

ecb_cold
static void
linuxaio_free_iocbp (EV_P)
{
while (linuxaio_iocbpmax--)
ev_free (linuxaio_iocbps [linuxaio_iocbpmax]);

linuxaio_iocbpmax = 0; /* next resize will completely reallocate the array, at some overhead */
}

static void
linuxaio_modify (EV_P_ int fd, int oev, int nev)
{
array_needsize (ANIOCBP, linuxaio_iocbps, linuxaio_iocbpmax, fd + 1, linuxaio_array_needsize_iocbp);
ANIOCBP iocb = linuxaio_iocbps [fd];
ANFD *anfd = &anfds [fd];

if (ecb_expect_false (iocb->io.aio_reqprio < 0))
{
/* we handed this fd over to epoll, so undo this first */
/* we do it manually because the optimisations on epoll_modify won't do us any good */
epoll_ctl (backend_fd, EPOLL_CTL_DEL, fd, 0);
anfd->emask = 0;
iocb->io.aio_reqprio = 0;
}
else if (ecb_expect_false (iocb->io.aio_buf))
{
/* iocb active, so cancel it first before resubmit */
/* this assumes we only ever get one call per fd per loop iteration */
for (;;)
{
/* on all relevant kernels, io_cancel fails with EINPROGRESS on "success" */
if (ecb_expect_false (evsys_io_cancel (linuxaio_ctx, &iocb->io, (struct io_event *)0) == 0))
break;

if (ecb_expect_true (errno == EINPROGRESS))
break;

/* the EINPROGRESS test is for nicer error message. clumsy. */
if (errno != EINTR)
{
assert (("libev: linuxaio unexpected io_cancel failed", errno != EINTR && errno != EINPROGRESS));
break;
}
}

/* increment generation counter to avoid handling old events */
++anfd->egen;
}

iocb->io.aio_buf = (nev & EV_READ ? POLLIN : 0)
| (nev & EV_WRITE ? POLLOUT : 0);

if (nev)
{
iocb->io.aio_data = (uint32_t)fd | ((__u64)(uint32_t)anfd->egen << 32);

/* queue iocb up for io_submit */
/* this assumes we only ever get one call per fd per loop iteration */
++linuxaio_submitcnt;
array_needsize (struct iocb *, linuxaio_submits, linuxaio_submitmax, linuxaio_submitcnt, array_needsize_noinit);
linuxaio_submits [linuxaio_submitcnt - 1] = &iocb->io;
}
}

static void
linuxaio_epoll_cb (EV_P_ struct ev_io *w, int revents)
{
epoll_poll (EV_A_ 0);
}

inline_speed
void
linuxaio_fd_rearm (EV_P_ int fd)
{
anfds [fd].events = 0;
linuxaio_iocbps [fd]->io.aio_buf = 0;
fd_change (EV_A_ fd, EV_ANFD_REIFY);
}

static void
linuxaio_parse_events (EV_P_ struct io_event *ev, int nr)
{
while (nr)
{
int fd = ev->data & 0xffffffff;
uint32_t gen = ev->data >> 32;
int res = ev->res;

assert (("libev: iocb fd must be in-bounds", fd >= 0 && fd < anfdmax));

/* only accept events if generation counter matches */
if (ecb_expect_true (gen == (uint32_t)anfds [fd].egen))
{
/* feed events, we do not expect or handle POLLNVAL */
fd_event (
EV_A_
fd,
(res & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0)
| (res & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0)
);

/* linux aio is oneshot: rearm fd. TODO: this does more work than strictly needed */
linuxaio_fd_rearm (EV_A_ fd);
}

--nr;
++ev;
}
}

/* get any events from ring buffer, return true if any were handled */
static int
linuxaio_get_events_from_ring (EV_P)
{
struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx;
unsigned head, tail;

/* the kernel reads and writes both of these variables, */
/* as a C extension, we assume that volatile use here */
/* both makes reads atomic and once-only */
head = *(volatile unsigned *)&ring->head;
ECB_MEMORY_FENCE_ACQUIRE;
tail = *(volatile unsigned *)&ring->tail;

if (head == tail)
return 0;

/* parse all available events, but only once, to avoid starvation */
if (ecb_expect_true (tail > head)) /* normal case around */
linuxaio_parse_events (EV_A_ ring->io_events + head, tail - head);
else /* wrapped around */
{
linuxaio_parse_events (EV_A_ ring->io_events + head, ring->nr - head);
linuxaio_parse_events (EV_A_ ring->io_events, tail);
}

ECB_MEMORY_FENCE_RELEASE;
/* as an extension to C, we hope that the volatile will make this atomic and once-only */
*(volatile unsigned *)&ring->head = tail;

return 1;
}

inline_size
int
linuxaio_ringbuf_valid (EV_P)
{
struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx;

return ecb_expect_true (ring->magic == AIO_RING_MAGIC)
&& ring->incompat_features == EV_AIO_RING_INCOMPAT_FEATURES
&& ring->header_length == sizeof (struct aio_ring); /* TODO: or use it to find io_event[0]? */
}

/* read at least one event from kernel, or timeout */
inline_size
void
linuxaio_get_events (EV_P_ ev_tstamp timeout)
{
struct timespec ts;
struct io_event ioev[8]; /* 256 octet stack space */
int want = 1; /* how many events to request */
int ringbuf_valid = linuxaio_ringbuf_valid (EV_A);

if (ecb_expect_true (ringbuf_valid))
{
/* if the ring buffer has any events, we don't wait or call the kernel at all */
if (linuxaio_get_events_from_ring (EV_A))
return;

/* if the ring buffer is empty, and we don't have a timeout, then don't call the kernel */
if (!timeout)
return;
}
else
/* no ringbuffer, request slightly larger batch */
want = sizeof (ioev) / sizeof (ioev [0]);

/* no events, so wait for some
* for fairness reasons, we do this in a loop, to fetch all events
*/
for (;;)
{
int res;

EV_RELEASE_CB;

EV_TS_SET (ts, timeout);
res = evsys_io_getevents (linuxaio_ctx, 1, want, ioev, &ts);

EV_ACQUIRE_CB;

if (res < 0)
if (errno == EINTR)
/* ignored, retry */;
else
ev_syserr ("(libev) linuxaio io_getevents");
else if (res)
{
/* at least one event available, handle them */
linuxaio_parse_events (EV_A_ ioev, res);

if (ecb_expect_true (ringbuf_valid))
{
/* if we have a ring buffer, handle any remaining events in it */
linuxaio_get_events_from_ring (EV_A);

/* at this point, we should have handled all outstanding events */
break;
}
else if (res < want)
/* otherwise, if there were fewere events than we wanted, we assume there are no more */
break;
}
else
break; /* no events from the kernel, we are done */

timeout = EV_TS_CONST (0.); /* only wait in the first iteration */
}
}

inline_size
int
linuxaio_io_setup (EV_P)
{
linuxaio_ctx = 0;
return evsys_io_setup (linuxaio_nr_events (EV_A), &linuxaio_ctx);
}

static void
linuxaio_poll (EV_P_ ev_tstamp timeout)
{
int submitted;

/* first phase: submit new iocbs */

/* io_submit might return less than the requested number of iocbs */
/* this is, afaics, only because of errors, but we go by the book and use a loop, */
/* which allows us to pinpoint the erroneous iocb */
for (submitted = 0; submitted < linuxaio_submitcnt; )
{
int res = evsys_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted);

if (ecb_expect_false (res < 0))
if (errno == EINVAL)
{
/* This happens for unsupported fds, officially, but in my testing,
* also randomly happens for supported fds. We fall back to good old
* poll() here, under the assumption that this is a very rare case.
* See https://lore.kernel.org/patchwork/patch/1047453/ to see
* discussion about such a case (ttys) where polling for POLLIN
* fails but POLLIN|POLLOUT works.
*/
struct iocb *iocb = linuxaio_submits [submitted];
epoll_modify (EV_A_ iocb->aio_fildes, 0, anfds [iocb->aio_fildes].events);
iocb->aio_reqprio = -1; /* mark iocb as epoll */

res = 1; /* skip this iocb - another iocb, another chance */
}
else if (errno == EAGAIN)
{
/* This happens when the ring buffer is full, or some other shit we
* don't know and isn't documented. Most likely because we have too
* many requests and linux aio can't be assed to handle them.
* In this case, we try to allocate a larger ring buffer, freeing
* ours first. This might fail, in which case we have to fall back to 100%
* epoll.
* God, how I hate linux not getting its act together. Ever.
*/
evsys_io_destroy (linuxaio_ctx);
linuxaio_submitcnt = 0;

/* rearm all fds with active iocbs */
{
int fd;
for (fd = 0; fd < linuxaio_iocbpmax; ++fd)
if (linuxaio_iocbps [fd]->io.aio_buf)
linuxaio_fd_rearm (EV_A_ fd);
}

++linuxaio_iteration;
if (linuxaio_io_setup (EV_A) < 0)
{
/* TODO: rearm all and recreate epoll backend from scratch */
/* TODO: might be more prudent? */

/* to bad, we can't get a new aio context, go 100% epoll */
linuxaio_free_iocbp (EV_A);
ev_io_stop (EV_A_ &linuxaio_epoll_w);
ev_ref (EV_A);
linuxaio_ctx = 0;

backend = EVBACKEND_EPOLL;
backend_modify = epoll_modify;
backend_poll = epoll_poll;
}

timeout = EV_TS_CONST (0.);
/* it's easiest to handle this mess in another iteration */
return;
}
else if (errno == EBADF)
{
assert (("libev: event loop rejected bad fd", errno != EBADF));
fd_kill (EV_A_ linuxaio_submits [submitted]->aio_fildes);

res = 1; /* skip this iocb */
}
else if (errno == EINTR) /* not seen in reality, not documented */
res = 0; /* silently ignore and retry */
else
{
ev_syserr ("(libev) linuxaio io_submit");
res = 0;
}

submitted += res;
}

linuxaio_submitcnt = 0;

/* second phase: fetch and parse events */

linuxaio_get_events (EV_A_ timeout);
}

inline_size
int
linuxaio_init (EV_P_ int flags)
{
/* would be great to have a nice test for IOCB_CMD_POLL instead */
/* also: test some semi-common fd types, such as files and ttys in recommended_backends */
/* 4.18 introduced IOCB_CMD_POLL, 4.19 made epoll work, and we need that */
if (ev_linux_version () < 0x041300)
return 0;

if (!epoll_init (EV_A_ 0))
return 0;

linuxaio_iteration = 0;

if (linuxaio_io_setup (EV_A) < 0)
{
epoll_destroy (EV_A);
return 0;
}

ev_io_init (&linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ);
ev_set_priority (&linuxaio_epoll_w, EV_MAXPRI);
ev_io_start (EV_A_ &linuxaio_epoll_w);
ev_unref (EV_A); /* watcher should not keep loop alive */

backend_modify = linuxaio_modify;
backend_poll = linuxaio_poll;

linuxaio_iocbpmax = 0;
linuxaio_iocbps = 0;

linuxaio_submits = 0;
linuxaio_submitmax = 0;
linuxaio_submitcnt = 0;

return EVBACKEND_LINUXAIO;
}

inline_size
void
linuxaio_destroy (EV_P)
{
epoll_destroy (EV_A);
linuxaio_free_iocbp (EV_A);
evsys_io_destroy (linuxaio_ctx); /* fails in child, aio context is destroyed */
}

ecb_cold
static void
linuxaio_fork (EV_P)
{
linuxaio_submitcnt = 0; /* all pointers were invalidated */
linuxaio_free_iocbp (EV_A); /* this frees all iocbs, which is very heavy-handed */
evsys_io_destroy (linuxaio_ctx); /* fails in child, aio context is destroyed */

linuxaio_iteration = 0; /* we start over in the child */

while (linuxaio_io_setup (EV_A) < 0)
ev_syserr ("(libev) linuxaio io_setup");

/* forking epoll should also effectively unregister all fds from the backend */
epoll_fork (EV_A);
/* epoll_fork already did this. hopefully */
/*fd_rearm_all (EV_A);*/

ev_io_stop (EV_A_ &linuxaio_epoll_w);
ev_io_set (EV_A_ &linuxaio_epoll_w, backend_fd, EV_READ);
ev_io_start (EV_A_ &linuxaio_epoll_w);
}


+ 19
- 14
contrib/libev/ev_poll.c View File

@@ -1,7 +1,7 @@
/*
* libev poll fd activity backend
*
* Copyright (c) 2007,2008,2009,2010,2011 Marc Alexander Lehmann <libev@schmorp.de>
* Copyright (c) 2007,2008,2009,2010,2011,2016,2019 Marc Alexander Lehmann <libev@schmorp.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modifica-
@@ -41,10 +41,12 @@

inline_size
void
pollidx_init (int *base, int count)
array_needsize_pollidx (int *base, int offset, int count)
{
/* consider using memset (.., -1, ...), which is practically guaranteed
* to work on all systems implementing poll */
/* using memset (.., -1, ...) is tempting, we we try
* to be ultraportable
*/
base += offset;
while (count--)
*base++ = -1;
}
@@ -57,14 +59,14 @@ poll_modify (EV_P_ int fd, int oev, int nev)
if (oev == nev)
return;

array_needsize (int, pollidxs, pollidxmax, fd + 1, pollidx_init);
array_needsize (int, pollidxs, pollidxmax, fd + 1, array_needsize_pollidx);

idx = pollidxs [fd];

if (idx < 0) /* need to allocate a new pollfd */
{
pollidxs [fd] = idx = pollcnt++;
array_needsize (struct pollfd, polls, pollmax, pollcnt, EMPTY2);
array_needsize (struct pollfd, polls, pollmax, pollcnt, array_needsize_noinit);
polls [idx].fd = fd;
}

@@ -78,7 +80,7 @@ poll_modify (EV_P_ int fd, int oev, int nev)
{
pollidxs [fd] = -1;

if (expect_true (idx < --pollcnt))
if (ecb_expect_true (idx < --pollcnt))
{
polls [idx] = polls [pollcnt];
pollidxs [polls [idx].fd] = idx;
@@ -93,10 +95,10 @@ poll_poll (EV_P_ ev_tstamp timeout)
int res;
EV_RELEASE_CB;
res = poll (polls, pollcnt, timeout * 1e3);
res = poll (polls, pollcnt, EV_TS_TO_MSEC (timeout));
EV_ACQUIRE_CB;

if (expect_false (res < 0))
if (ecb_expect_false (res < 0))
{
if (errno == EBADF)
fd_ebadf (EV_A);
@@ -108,14 +110,17 @@ poll_poll (EV_P_ ev_tstamp timeout)
else
for (p = polls; res; ++p)
{
assert (("libev: poll() returned illegal result, broken BSD kernel?", p < polls + pollcnt));
assert (("libev: poll returned illegal result, broken BSD kernel?", p < polls + pollcnt));

if (expect_false (p->revents)) /* this expect is debatable */
if (ecb_expect_false (p->revents)) /* this expect is debatable */
{
--res;

if (expect_false (p->revents & POLLNVAL))
fd_kill (EV_A_ p->fd);
if (ecb_expect_false (p->revents & POLLNVAL))
{
assert (("libev: poll found invalid fd in poll set", 0));
fd_kill (EV_A_ p->fd);
}
else
fd_event (
EV_A_
@@ -131,7 +136,7 @@ inline_size
int
poll_init (EV_P_ int flags)
{
backend_mintime = 1e-3;
backend_mintime = EV_TS_CONST (1e-3);
backend_modify = poll_modify;
backend_poll = poll_poll;


+ 8
- 5
contrib/libev/ev_port.c View File

@@ -1,7 +1,7 @@
/*
* libev solaris event port backend
*
* Copyright (c) 2007,2008,2009,2010,2011 Marc Alexander Lehmann <libev@schmorp.de>
* Copyright (c) 2007,2008,2009,2010,2011,2019 Marc Alexander Lehmann <libev@schmorp.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modifica-
@@ -69,7 +69,10 @@ port_associate_and_check (EV_P_ int fd, int ev)
)
{
if (errno == EBADFD)
fd_kill (EV_A_ fd);
{
assert (("libev: port_associate found invalid fd", errno != EBADFD));
fd_kill (EV_A_ fd);
}
else
ev_syserr ("(libev) port_associate");
}
@@ -129,7 +132,7 @@ port_poll (EV_P_ ev_tstamp timeout)
}
}

if (expect_false (nget == port_eventmax))
if (ecb_expect_false (nget == port_eventmax))
{
ev_free (port_events);
port_eventmax = array_nextsize (sizeof (port_event_t), port_eventmax, port_eventmax + 1);
@@ -151,11 +154,11 @@ port_init (EV_P_ int flags)

/* if my reading of the opensolaris kernel sources are correct, then
* opensolaris does something very stupid: it checks if the time has already
* elapsed and doesn't round up if that is the case,m otherwise it DOES round
* elapsed and doesn't round up if that is the case, otherwise it DOES round
* up. Since we can't know what the case is, we need to guess by using a
* "large enough" timeout. Normally, 1e-9 would be correct.
*/
backend_mintime = 1e-3; /* needed to compensate for port_getn returning early */
backend_mintime = EV_TS_CONST (1e-3); /* needed to compensate for port_getn returning early */
backend_modify = port_modify;
backend_poll = port_poll;


+ 6
- 6
contrib/libev/ev_select.c View File

@@ -108,7 +108,7 @@ select_modify (EV_P_ int fd, int oev, int nev)
int word = fd / NFDBITS;
fd_mask mask = 1UL << (fd % NFDBITS);

if (expect_false (vec_max <= word))
if (ecb_expect_false (vec_max <= word))
{
int new_max = word + 1;

@@ -171,7 +171,7 @@ select_poll (EV_P_ ev_tstamp timeout)
#endif
EV_ACQUIRE_CB;

if (expect_false (res < 0))
if (ecb_expect_false (res < 0))
{
#if EV_SELECT_IS_WINSOCKET
errno = WSAGetLastError ();
@@ -197,7 +197,7 @@ select_poll (EV_P_ ev_tstamp timeout)
{
if (timeout)
{
unsigned long ms = timeout * 1e3;
unsigned long ms = EV_TS_TO_MSEC (timeout);
Sleep (ms ? ms : 1);
}

@@ -236,7 +236,7 @@ select_poll (EV_P_ ev_tstamp timeout)
if (FD_ISSET (handle, (fd_set *)vec_eo)) events |= EV_WRITE;
#endif

if (expect_true (events))
if (ecb_expect_true (events))
fd_event (EV_A_ fd, events);
}
}
@@ -262,7 +262,7 @@ select_poll (EV_P_ ev_tstamp timeout)
events |= word_r & mask ? EV_READ : 0;
events |= word_w & mask ? EV_WRITE : 0;

if (expect_true (events))
if (ecb_expect_true (events))
fd_event (EV_A_ word * NFDBITS + bit, events);
}
}
@@ -275,7 +275,7 @@ inline_size
int
select_init (EV_P_ int flags)
{
backend_mintime = 1e-6;
backend_mintime = EV_TS_CONST (1e-6);
backend_modify = select_modify;
backend_poll = select_poll;


+ 46
- 1
contrib/libev/ev_vars.h View File

@@ -1,7 +1,7 @@
/*
* loop member variable declarations
*
* Copyright (c) 2007,2008,2009,2010,2011,2012,2013 Marc Alexander Lehmann <libev@schmorp.de>
* Copyright (c) 2007,2008,2009,2010,2011,2012,2013,2019 Marc Alexander Lehmann <libev@schmorp.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modifica-
@@ -107,6 +107,46 @@ VARx(int, epoll_epermcnt)
VARx(int, epoll_epermmax)
#endif

#if EV_USE_LINUXAIO || EV_GENWRAP
VARx(aio_context_t, linuxaio_ctx)
VARx(int, linuxaio_iteration)
VARx(struct aniocb **, linuxaio_iocbps)
VARx(int, linuxaio_iocbpmax)
VARx(struct iocb **, linuxaio_submits)
VARx(int, linuxaio_submitcnt)
VARx(int, linuxaio_submitmax)
VARx(ev_io, linuxaio_epoll_w)
#endif

#if EV_USE_IOURING || EV_GENWRAP
VARx(int, iouring_fd)
VARx(unsigned, iouring_to_submit);
VARx(int, iouring_entries)
VARx(int, iouring_max_entries)
VARx(void *, iouring_sq_ring)
VARx(void *, iouring_cq_ring)
VARx(void *, iouring_sqes)
VARx(uint32_t, iouring_sq_ring_size)
VARx(uint32_t, iouring_cq_ring_size)
VARx(uint32_t, iouring_sqes_size)
VARx(uint32_t, iouring_sq_head)
VARx(uint32_t, iouring_sq_tail)
VARx(uint32_t, iouring_sq_ring_mask)
VARx(uint32_t, iouring_sq_ring_entries)
VARx(uint32_t, iouring_sq_flags)
VARx(uint32_t, iouring_sq_dropped)
VARx(uint32_t, iouring_sq_array)
VARx(uint32_t, iouring_cq_head)
VARx(uint32_t, iouring_cq_tail)
VARx(uint32_t, iouring_cq_ring_mask)
VARx(uint32_t, iouring_cq_ring_entries)
VARx(uint32_t, iouring_cq_overflow)
VARx(uint32_t, iouring_cq_cqes)
VARx(ev_tstamp, iouring_tfd_to)
VARx(int, iouring_tfd)
VARx(ev_io, iouring_tfd_w)
#endif

#if EV_USE_KQUEUE || EV_GENWRAP
VARx(pid_t, kqueue_fd_pid)
VARx(struct kevent *, kqueue_changes)
@@ -187,6 +227,11 @@ VARx(ev_io, sigfd_w)
VARx(sigset_t, sigfd_set)
#endif

#if EV_USE_TIMERFD || EV_GENWRAP
VARx(int, timerfd) /* timerfd for time jump detection */
VARx(ev_io, timerfd_w)
#endif

VARx(unsigned int, origflags) /* original loop flags */

#if EV_FEATURE_API || EV_GENWRAP

+ 2
- 2
contrib/libev/ev_win32.c View File

@@ -154,8 +154,8 @@ ev_time (void)
ui.u.LowPart = ft.dwLowDateTime;
ui.u.HighPart = ft.dwHighDateTime;

/* msvc cannot convert ulonglong to double... yes, it is that sucky */
return (LONGLONG)(ui.QuadPart - 116444736000000000) * 1e-7;
/* also, msvc cannot convert ulonglong to double... yes, it is that sucky */
return EV_TS_FROM_USEC (((LONGLONG)(ui.QuadPart - 116444736000000000) * 1e-1));
}

#endif

+ 72
- 0
contrib/libev/ev_wrap.h View File

@@ -44,12 +44,46 @@
#define invoke_cb ((loop)->invoke_cb)
#define io_blocktime ((loop)->io_blocktime)
#define iocp ((loop)->iocp)
#define iouring_cq_cqes ((loop)->iouring_cq_cqes)
#define iouring_cq_head ((loop)->iouring_cq_head)
#define iouring_cq_overflow ((loop)->iouring_cq_overflow)
#define iouring_cq_ring ((loop)->iouring_cq_ring)
#define iouring_cq_ring_entries ((loop)->iouring_cq_ring_entries)
#define iouring_cq_ring_mask ((loop)->iouring_cq_ring_mask)
#define iouring_cq_ring_size ((loop)->iouring_cq_ring_size)
#define iouring_cq_tail ((loop)->iouring_cq_tail)
#define iouring_entries ((loop)->iouring_entries)
#define iouring_fd ((loop)->iouring_fd)
#define iouring_max_entries ((loop)->iouring_max_entries)
#define iouring_sq_array ((loop)->iouring_sq_array)
#define iouring_sq_dropped ((loop)->iouring_sq_dropped)
#define iouring_sq_flags ((loop)->iouring_sq_flags)
#define iouring_sq_head ((loop)->iouring_sq_head)
#define iouring_sq_ring ((loop)->iouring_sq_ring)
#define iouring_sq_ring_entries ((loop)->iouring_sq_ring_entries)
#define iouring_sq_ring_mask ((loop)->iouring_sq_ring_mask)
#define iouring_sq_ring_size ((loop)->iouring_sq_ring_size)
#define iouring_sq_tail ((loop)->iouring_sq_tail)
#define iouring_sqes ((loop)->iouring_sqes)
#define iouring_sqes_size ((loop)->iouring_sqes_size)
#define iouring_tfd ((loop)->iouring_tfd)
#define iouring_tfd_to ((loop)->iouring_tfd_to)
#define iouring_tfd_w ((loop)->iouring_tfd_w)
#define iouring_to_submit ((loop)->iouring_to_submit)
#define kqueue_changecnt ((loop)->kqueue_changecnt)
#define kqueue_changemax ((loop)->kqueue_changemax)
#define kqueue_changes ((loop)->kqueue_changes)
#define kqueue_eventmax ((loop)->kqueue_eventmax)
#define kqueue_events ((loop)->kqueue_events)
#define kqueue_fd_pid ((loop)->kqueue_fd_pid)
#define linuxaio_ctx ((loop)->linuxaio_ctx)
#define linuxaio_epoll_w ((loop)->linuxaio_epoll_w)
#define linuxaio_iocbpmax ((loop)->linuxaio_iocbpmax)
#define linuxaio_iocbps ((loop)->linuxaio_iocbps)
#define linuxaio_iteration ((loop)->linuxaio_iteration)
#define linuxaio_submitcnt ((loop)->linuxaio_submitcnt)
#define linuxaio_submitmax ((loop)->linuxaio_submitmax)
#define linuxaio_submits ((loop)->linuxaio_submits)
#define loop_count ((loop)->loop_count)
#define loop_depth ((loop)->loop_depth)
#define loop_done ((loop)->loop_done)
@@ -89,6 +123,8 @@
#define sigfd_w ((loop)->sigfd_w)
#define timeout_blocktime ((loop)->timeout_blocktime)
#define timercnt ((loop)->timercnt)
#define timerfd ((loop)->timerfd)
#define timerfd_w ((loop)->timerfd_w)
#define timermax ((loop)->timermax)
#define timers ((loop)->timers)
#define userdata ((loop)->userdata)
@@ -143,12 +179,46 @@
#undef invoke_cb
#undef io_blocktime
#undef iocp
#undef iouring_cq_cqes
#undef iouring_cq_head
#undef iouring_cq_overflow
#undef iouring_cq_ring
#undef iouring_cq_ring_entries
#undef iouring_cq_ring_mask
#undef iouring_cq_ring_size
#undef iouring_cq_tail
#undef iouring_entries
#undef iouring_fd
#undef iouring_max_entries
#undef iouring_sq_array
#undef iouring_sq_dropped
#undef iouring_sq_flags
#undef iouring_sq_head
#undef iouring_sq_ring
#undef iouring_sq_ring_entries
#undef iouring_sq_ring_mask
#undef iouring_sq_ring_size
#undef iouring_sq_tail
#undef iouring_sqes
#undef iouring_sqes_size
#undef iouring_tfd
#undef iouring_tfd_to
#undef iouring_tfd_w
#undef iouring_to_submit
#undef kqueue_changecnt
#undef kqueue_changemax
#undef kqueue_changes
#undef kqueue_eventmax
#undef kqueue_events
#undef kqueue_fd_pid
#undef linuxaio_ctx
#undef linuxaio_epoll_w
#undef linuxaio_iocbpmax
#undef linuxaio_iocbps
#undef linuxaio_iteration
#undef linuxaio_submitcnt
#undef linuxaio_submitmax
#undef linuxaio_submits
#undef loop_count
#undef loop_depth
#undef loop_done
@@ -188,6 +258,8 @@
#undef sigfd_w
#undef timeout_blocktime
#undef timercnt
#undef timerfd
#undef timerfd_w
#undef timermax
#undef timers
#undef userdata

+ 8
- 0
src/libserver/cfg_utils.c View File

@@ -2631,6 +2631,14 @@ rspamd_config_ev_backend_to_string (int ev_backend, gboolean *effective)
}

if (ev_backend & EVBACKEND_EPOLL) {
if (ev_backend & EVBACKEND_IOURING) {
SET_EFFECTIVE (TRUE);
return "epoll+io_uring";
}
if (ev_backend & EVBACKEND_LINUXAIO) {
SET_EFFECTIVE (TRUE);
return "epoll+aio";
}
SET_EFFECTIVE (TRUE);
return "epoll";
}

Loading…
Cancel
Save