diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-03-24 16:36:43 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2020-03-24 16:36:43 +0000 |
commit | 4610fcee939fb6646703285d2da3b47a111d58eb (patch) | |
tree | 6b01864ccea44b37e6efc2553691b9e9dc6a1fa8 | |
parent | 47ecc8cf045b157ed8b7f9cc74ba08516ccc6164 (diff) | |
download | rspamd-4610fcee939fb6646703285d2da3b47a111d58eb.tar.gz rspamd-4610fcee939fb6646703285d2da3b47a111d58eb.zip |
[Feature] Update libev to 4.33
Changelog: http://cvs.schmorp.de/libev/Changes?pathrev=rel-4_33
-rw-r--r-- | contrib/libev/CMakeLists.txt | 8 | ||||
-rw-r--r-- | contrib/libev/config.h.in | 19 | ||||
-rw-r--r-- | contrib/libev/ev.c | 987 | ||||
-rw-r--r-- | contrib/libev/ev.h | 61 | ||||
-rw-r--r-- | contrib/libev/ev_epoll.c | 69 | ||||
-rw-r--r-- | contrib/libev/ev_iouring.c | 694 | ||||
-rw-r--r-- | contrib/libev/ev_kqueue.c | 24 | ||||
-rw-r--r-- | contrib/libev/ev_linuxaio.c | 620 | ||||
-rw-r--r-- | contrib/libev/ev_poll.c | 33 | ||||
-rw-r--r-- | contrib/libev/ev_port.c | 13 | ||||
-rw-r--r-- | contrib/libev/ev_select.c | 12 | ||||
-rw-r--r-- | contrib/libev/ev_vars.h | 47 | ||||
-rw-r--r-- | contrib/libev/ev_win32.c | 4 | ||||
-rw-r--r-- | contrib/libev/ev_wrap.h | 72 | ||||
-rw-r--r-- | src/libserver/cfg_utils.c | 8 |
15 files changed, 2309 insertions, 362 deletions
diff --git a/contrib/libev/CMakeLists.txt b/contrib/libev/CMakeLists.txt index 591166a96..e81aaee3f 100644 --- a/contrib/libev/CMakeLists.txt +++ b/contrib/libev/CMakeLists.txt @@ -9,8 +9,12 @@ CHECK_INCLUDE_FILES(sys/stat.h HAVE_SYS_STAT_H) CHECK_INCLUDE_FILES(sys/signalfd.h HAVE_SYS_SIGNALFD_H) CHECK_INCLUDE_FILES(port.h HAVE_PORT_H) CHECK_INCLUDE_FILES(poll.h HAVE_POLL_H) +CHECK_INCLUDE_FILES(memory.h HAVE_MEMORY_H) CHECK_INCLUDE_FILES(sys/select.h HAVE_SYS_SELECT_H) CHECK_INCLUDE_FILES(sys/eventfd.h HAVE_SYS_EVENTFD_H) +CHECK_INCLUDE_FILES(sys/timerfd.h HAVE_SYS_TIMERFD_H) +CHECK_INCLUDE_FILES(linux/fs.h HAVE_LINUX_FS_H) +CHECK_INCLUDE_FILES(linux/aio_abi.h HAVE_LINUX_AIO_ABI_H) IF(HAVE_SYS_INOTIFY_H) CHECK_SYMBOL_EXISTS(inotify_init "sys/types.h;sys/inotify.h" HAVE_INOTIFY_INIT) @@ -36,7 +40,9 @@ ENDIF() IF(HAVE_SYS_SIGNALFD_H) CHECK_SYMBOL_EXISTS(signalfd sys/signalfd.h HAVE_EVENTFD) ENDIF() - +IF(HAVE_LINUX_FS_H) + CHECK_SYMBOL_EXISTS(RWF_SUPPORTED linux/fs.h HAVE_KERNEL_RWF_T) +ENDIF() CHECK_SYMBOL_EXISTS(time.h nanosleep HAVE_NANOSLEEP) # check first without rt diff --git a/contrib/libev/config.h.in b/contrib/libev/config.h.in index 2bc23a75a..10fc6271a 100644 --- a/contrib/libev/config.h.in +++ b/contrib/libev/config.h.in @@ -24,12 +24,21 @@ /* Define to 1 if you have the <inttypes.h> header file. */ #cmakedefine HAVE_INTTYPES_H 1 +/* Define to 1 if linux/fs.h defined kernel_rwf_t */ +#cmakedefine HAVE_KERNEL_RWF_T 1 + /* Define to 1 if you have the `kqueue' function. */ #cmakedefine HAVE_KQUEUE 1 /* Define to 1 if you have the `rt' library (-lrt). */ #cmakedefine HAVE_LIBRT 1 +/* Define to 1 if you have the <linux/aio_abi.h> header file. */ +#cmakedefine HAVE_LINUX_AIO_ABI_H 1 + +/* Define to 1 if you have the <linux/fs.h> header file. */ +#cmakedefine HAVE_LINUX_FS_H 1 + /* Define to 1 if you have the <memory.h> header file. */ #cmakedefine HAVE_MEMORY_H 1 @@ -87,18 +96,14 @@ /* Define to 1 if you have the <sys/stat.h> header file. */ #cmakedefine HAVE_SYS_STAT_H 1 +/* Define to 1 if you have the <sys/timerfd.h> header file. */ +#cmakedefine HAVE_SYS_TIMERFD_H 1 + /* Define to 1 if you have the <sys/types.h> header file. */ #cmakedefine HAVE_SYS_TYPES_H 1 /* Define to 1 if you have the <unistd.h> header file. */ #cmakedefine HAVE_UNISTD_H 1 - -/* Define to the version of this package. */ -#define PACKAGE_VERSION 4.25 - /* Define to 1 if you have the ANSI C header files. */ #define STDC_HEADERS 1 - -/* Version number of package */ -#undef VERSION diff --git a/contrib/libev/ev.c b/contrib/libev/ev.c index de32e1781..297fa7e1b 100644 --- a/contrib/libev/ev.c +++ b/contrib/libev/ev.c @@ -1,7 +1,7 @@ /* * libev event processing core, watcher management * - * Copyright (c) 2007-2018 Marc Alexander Lehmann <libev@schmorp.de> + * Copyright (c) 2007-2019 Marc Alexander Lehmann <libev@schmorp.de> * All rights reserved. * * Redistribution and use in source and binary forms, with or without modifica- @@ -47,7 +47,6 @@ #ifdef __GNUC__ #pragma GCC diagnostic ignored "-Wunused-value" -#pragma GCC diagnostic ignored "-Wstrict-aliasing" #endif # if HAVE_FLOOR @@ -122,6 +121,24 @@ # define EV_USE_EPOLL 0 # endif +# if HAVE_LINUX_AIO_ABI_H +# ifndef EV_USE_LINUXAIO +# define EV_USE_LINUXAIO 0 /* was: EV_FEATURE_BACKENDS, always off by default */ +# endif +# else +# undef EV_USE_LINUXAIO +# define EV_USE_LINUXAIO 0 +# endif + +# if HAVE_LINUX_FS_H && HAVE_SYS_TIMERFD_H && HAVE_KERNEL_RWF_T +# ifndef EV_USE_IOURING +# define EV_USE_IOURING EV_FEATURE_BACKENDS +# endif +# else +# undef EV_USE_IOURING +# define EV_USE_IOURING 0 +# endif + # if HAVE_KQUEUE && HAVE_SYS_EVENT_H # ifndef EV_USE_KQUEUE # define EV_USE_KQUEUE EV_FEATURE_BACKENDS @@ -167,6 +184,15 @@ # define EV_USE_EVENTFD 0 # endif +# if HAVE_SYS_TIMERFD_H +# ifndef EV_USE_TIMERFD +# define EV_USE_TIMERFD EV_FEATURE_OS +# endif +# else +# undef EV_USE_TIMERFD +# define EV_USE_TIMERFD 0 +# endif + #endif /* OS X, in its infinite idiocy, actually HARDCODES @@ -322,6 +348,22 @@ # define EV_USE_PORT 0 #endif +#ifndef EV_USE_LINUXAIO +# if __linux /* libev currently assumes linux/aio_abi.h is always available on linux */ +# define EV_USE_LINUXAIO 0 /* was: 1, always off by default */ +# else +# define EV_USE_LINUXAIO 0 +# endif +#endif + +#ifndef EV_USE_IOURING +# if __linux /* later checks might disable again */ +# define EV_USE_IOURING 1 +# else +# define EV_USE_IOURING 0 +# endif +#endif + #ifndef EV_USE_INOTIFY # if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 4)) # define EV_USE_INOTIFY EV_FEATURE_OS @@ -354,6 +396,14 @@ # endif #endif +#ifndef EV_USE_TIMERFD +# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 8)) +# define EV_USE_TIMERFD EV_FEATURE_OS +# else +# define EV_USE_TIMERFD 0 +# endif +#endif + #if 0 /* debugging */ # define EV_VERIFY 3 # define EV_USE_4HEAP 1 @@ -396,6 +446,7 @@ # define clock_gettime(id, ts) syscall (SYS_clock_gettime, (id), (ts)) # undef EV_USE_MONOTONIC # define EV_USE_MONOTONIC 1 +# define EV_NEED_SYSCALL 1 # else # undef EV_USE_CLOCK_SYSCALL # define EV_USE_CLOCK_SYSCALL 0 @@ -419,6 +470,14 @@ # define EV_USE_INOTIFY 0 #endif +#if __linux && EV_USE_IOURING +# include <linux/version.h> +# if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0) +# undef EV_USE_IOURING +# define EV_USE_IOURING 0 +# endif +#endif + #if !EV_USE_NANOSLEEP /* hp-ux has it in sys/time.h, which we unconditionally include above */ # if !defined _WIN32 && !defined __hpux @@ -426,6 +485,31 @@ # endif #endif +#if EV_USE_LINUXAIO +# include <sys/syscall.h> +# if SYS_io_getevents && EV_USE_EPOLL /* linuxaio backend requires epoll backend */ +# define EV_NEED_SYSCALL 1 +# else +# undef EV_USE_LINUXAIO +# define EV_USE_LINUXAIO 0 +# endif +#endif + +#if EV_USE_IOURING +# include <sys/syscall.h> +# if !SYS_io_uring_setup && __linux && !__alpha +# define SYS_io_uring_setup 425 +# define SYS_io_uring_enter 426 +# define SYS_io_uring_wregister 427 +# endif +# if SYS_io_uring_setup && EV_USE_EPOLL /* iouring backend requires epoll backend */ +# define EV_NEED_SYSCALL 1 +# else +# undef EV_USE_IOURING +# define EV_USE_IOURING 0 +# endif +#endif + #if EV_USE_INOTIFY # include <sys/statfs.h> # include <sys/inotify.h> @@ -437,7 +521,7 @@ #endif #if EV_USE_EVENTFD -/* our minimum requirement is glibc 2.7 which has the stub, but not the header */ +/* our minimum requirement is glibc 2.7 which has the stub, but not the full header */ # include <stdint.h> # ifndef EFD_NONBLOCK # define EFD_NONBLOCK O_NONBLOCK @@ -453,7 +537,7 @@ EV_CPP(extern "C") int (eventfd) (unsigned int initval, int flags); #endif #if EV_USE_SIGNALFD -/* our minimum requirement is glibc 2.7 which has the stub, but not the header */ +/* our minimum requirement is glibc 2.7 which has the stub, but not the full header */ # include <stdint.h> # ifndef SFD_NONBLOCK # define SFD_NONBLOCK O_NONBLOCK @@ -465,7 +549,7 @@ EV_CPP(extern "C") int (eventfd) (unsigned int initval, int flags); # define SFD_CLOEXEC 02000000 # endif # endif -EV_CPP (extern "C") int signalfd (int fd, const sigset_t *mask, int flags); +EV_CPP (extern "C") int (signalfd) (int fd, const sigset_t *mask, int flags); struct signalfd_siginfo { @@ -474,7 +558,17 @@ struct signalfd_siginfo }; #endif -/**/ +/* for timerfd, libev core requires TFD_TIMER_CANCEL_ON_SET &c */ +#if EV_USE_TIMERFD +# include <sys/timerfd.h> +/* timerfd is only used for periodics */ +# if !(defined (TFD_TIMER_CANCEL_ON_SET) && defined (TFD_CLOEXEC) && defined (TFD_NONBLOCK)) || !EV_PERIODIC_ENABLE +# undef EV_USE_TIMERFD +# define EV_USE_TIMERFD 0 +# endif +#endif + +/*****************************************************************************/ #if EV_VERIFY >= 3 # define EV_FREQUENT_CHECK ev_verify (EV_A) @@ -487,22 +581,36 @@ struct signalfd_siginfo * This value is good at least till the year 4000. */ #define MIN_INTERVAL 0.0001220703125 /* 1/2**13, good till 4000 */ -#if 0 -#define MIN_INTERVAL 0.00000095367431640625 /* 1/2**20, good till 2200 */ +// #define MIN_INTERVAL 0.00000095367431640625 /* 1/2**20, good till 2200 */ + +#define MIN_TIMEJUMP 1. /* minimum timejump that gets detected (if monotonic clock available) */ +#define MAX_BLOCKTIME 59.743 /* never wait longer than this time (to detect time jumps) */ +#define MAX_BLOCKTIME2 1500001.07 /* same, but when timerfd is used to detect jumps, also safe delay to not overflow */ + +/* find a portable timestamp that is "always" in the future but fits into time_t. + * this is quite hard, and we are mostly guessing - we handle 32 bit signed/unsigned time_t, + * and sizes larger than 32 bit, and maybe the unlikely floating point time_t */ +#define EV_TSTAMP_HUGE \ + (sizeof (time_t) >= 8 ? 10000000000000. \ + : 0 < (time_t)4294967295 ? 4294967295. \ + : 2147483647.) \ + +#ifndef EV_TS_CONST +# define EV_TS_CONST(nv) nv +# define EV_TS_TO_MSEC(a) a * 1e3 + 0.9999 +# define EV_TS_FROM_USEC(us) us * 1e-6 +# define EV_TV_SET(tv,t) do { tv.tv_sec = (long)t; tv.tv_usec = (long)((t - tv.tv_sec) * 1e6); } while (0) +# define EV_TS_SET(ts,t) do { ts.tv_sec = (long)t; ts.tv_nsec = (long)((t - ts.tv_sec) * 1e9); } while (0) +# define EV_TV_GET(tv) ((tv).tv_sec + (tv).tv_usec * 1e-6) +# define EV_TS_GET(ts) ((ts).tv_sec + (ts).tv_nsec * 1e-9) #endif -#define MIN_TIMEJUMP 1. /* minimum timejump that gets detected (if monotonic clock available) */ -#define MAX_BLOCKTIME 59.743 /* never wait longer than this time (to detect time jumps) */ - -#define EV_TV_SET(tv,t) do { tv.tv_sec = (long)t; tv.tv_usec = (long)((t - tv.tv_sec) * 1e6); } while (0) -#define EV_TS_SET(ts,t) do { ts.tv_sec = (long)t; ts.tv_nsec = (long)((t - ts.tv_sec) * 1e9); } while (0) - /* the following is ecb.h embedded into libev - use update_ev_c to update from an external copy */ /* ECB.H BEGIN */ /* * libecb - http://software.schmorp.de/pkg/libecb * - * Copyright (©) 2009-2015 Marc Alexander Lehmann <libecb@schmorp.de> + * Copyright (©) 2009-2015,2018-2020 Marc Alexander Lehmann <libecb@schmorp.de> * Copyright (©) 2011 Emanuele Giaquinta * All rights reserved. * @@ -543,15 +651,23 @@ struct signalfd_siginfo #define ECB_H /* 16 bits major, 16 bits minor */ -#define ECB_VERSION 0x00010005 +#define ECB_VERSION 0x00010008 -#ifdef _WIN32 +#include <string.h> /* for memcpy */ + +#if defined (_WIN32) && !defined (__MINGW32__) typedef signed char int8_t; typedef unsigned char uint8_t; + typedef signed char int_fast8_t; + typedef unsigned char uint_fast8_t; typedef signed short int16_t; typedef unsigned short uint16_t; + typedef signed int int_fast16_t; + typedef unsigned int uint_fast16_t; typedef signed int int32_t; typedef unsigned int uint32_t; + typedef signed int int_fast32_t; + typedef unsigned int uint_fast32_t; #if __GNUC__ typedef signed long long int64_t; typedef unsigned long long uint64_t; @@ -559,6 +675,8 @@ struct signalfd_siginfo typedef signed __int64 int64_t; typedef unsigned __int64 uint64_t; #endif + typedef int64_t int_fast64_t; + typedef uint64_t uint_fast64_t; #ifdef _WIN64 #define ECB_PTRSIZE 8 typedef uint64_t uintptr_t; @@ -580,6 +698,14 @@ struct signalfd_siginfo #define ECB_GCC_AMD64 (__amd64 || __amd64__ || __x86_64 || __x86_64__) #define ECB_MSVC_AMD64 (_M_AMD64 || _M_X64) +#ifndef ECB_OPTIMIZE_SIZE + #if __OPTIMIZE_SIZE__ + #define ECB_OPTIMIZE_SIZE 1 + #else + #define ECB_OPTIMIZE_SIZE 0 + #endif +#endif + /* work around x32 idiocy by defining proper macros */ #if ECB_GCC_AMD64 || ECB_MSVC_AMD64 #if _ILP32 @@ -667,6 +793,7 @@ struct signalfd_siginfo #ifndef ECB_MEMORY_FENCE #if ECB_GCC_VERSION(2,5) || defined __INTEL_COMPILER || (__llvm__ && __GNUC__) || __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110 + #define ECB_MEMORY_FENCE_RELAXED __asm__ __volatile__ ("" : : : "memory") #if __i386 || __i386__ #define ECB_MEMORY_FENCE __asm__ __volatile__ ("lock; orb $0, -1(%%esp)" : : : "memory") #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("" : : : "memory") @@ -726,12 +853,14 @@ struct signalfd_siginfo #define ECB_MEMORY_FENCE __atomic_thread_fence (__ATOMIC_SEQ_CST) #define ECB_MEMORY_FENCE_ACQUIRE __atomic_thread_fence (__ATOMIC_ACQUIRE) #define ECB_MEMORY_FENCE_RELEASE __atomic_thread_fence (__ATOMIC_RELEASE) + #define ECB_MEMORY_FENCE_RELAXED __atomic_thread_fence (__ATOMIC_RELAXED) #elif ECB_CLANG_EXTENSION(c_atomic) /* see comment below (stdatomic.h) about the C11 memory model. */ #define ECB_MEMORY_FENCE __c11_atomic_thread_fence (__ATOMIC_SEQ_CST) #define ECB_MEMORY_FENCE_ACQUIRE __c11_atomic_thread_fence (__ATOMIC_ACQUIRE) #define ECB_MEMORY_FENCE_RELEASE __c11_atomic_thread_fence (__ATOMIC_RELEASE) + #define ECB_MEMORY_FENCE_RELAXED __c11_atomic_thread_fence (__ATOMIC_RELAXED) #elif ECB_GCC_VERSION(4,4) || defined __INTEL_COMPILER || defined __clang__ #define ECB_MEMORY_FENCE __sync_synchronize () @@ -751,9 +880,10 @@ struct signalfd_siginfo #define ECB_MEMORY_FENCE MemoryBarrier () /* actually just xchg on x86... scary */ #elif __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110 #include <mbarrier.h> - #define ECB_MEMORY_FENCE __machine_rw_barrier () - #define ECB_MEMORY_FENCE_ACQUIRE __machine_r_barrier () - #define ECB_MEMORY_FENCE_RELEASE __machine_w_barrier () + #define ECB_MEMORY_FENCE __machine_rw_barrier () + #define ECB_MEMORY_FENCE_ACQUIRE __machine_acq_barrier () + #define ECB_MEMORY_FENCE_RELEASE __machine_rel_barrier () + #define ECB_MEMORY_FENCE_RELAXED __compiler_barrier () #elif __xlC__ #define ECB_MEMORY_FENCE __sync () #endif @@ -764,15 +894,9 @@ struct signalfd_siginfo /* we assume that these memory fences work on all variables/all memory accesses, */ /* not just C11 atomics and atomic accesses */ #include <stdatomic.h> - /* Unfortunately, neither gcc 4.7 nor clang 3.1 generate any instructions for */ - /* any fence other than seq_cst, which isn't very efficient for us. */ - /* Why that is, we don't know - either the C11 memory model is quite useless */ - /* for most usages, or gcc and clang have a bug */ - /* I *currently* lean towards the latter, and inefficiently implement */ - /* all three of ecb's fences as a seq_cst fence */ - /* Update, gcc-4.8 generates mfence for all c++ fences, but nothing */ - /* for all __atomic_thread_fence's except seq_cst */ #define ECB_MEMORY_FENCE atomic_thread_fence (memory_order_seq_cst) + #define ECB_MEMORY_FENCE_ACQUIRE atomic_thread_fence (memory_order_acquire) + #define ECB_MEMORY_FENCE_RELEASE atomic_thread_fence (memory_order_release) #endif #endif @@ -802,6 +926,10 @@ struct signalfd_siginfo #define ECB_MEMORY_FENCE_RELEASE ECB_MEMORY_FENCE #endif +#if !defined ECB_MEMORY_FENCE_RELAXED && defined ECB_MEMORY_FENCE + #define ECB_MEMORY_FENCE_RELAXED ECB_MEMORY_FENCE /* very heavy-handed */ +#endif + /*****************************************************************************/ #if ECB_CPP @@ -1093,6 +1221,44 @@ ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { retu ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (64 - count)) | (x << count); } ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (64 - count)) | (x >> count); } +#if ECB_CPP + +inline uint8_t ecb_ctz (uint8_t v) { return ecb_ctz32 (v); } +inline uint16_t ecb_ctz (uint16_t v) { return ecb_ctz32 (v); } +inline uint32_t ecb_ctz (uint32_t v) { return ecb_ctz32 (v); } +inline uint64_t ecb_ctz (uint64_t v) { return ecb_ctz64 (v); } + +inline bool ecb_is_pot (uint8_t v) { return ecb_is_pot32 (v); } +inline bool ecb_is_pot (uint16_t v) { return ecb_is_pot32 (v); } +inline bool ecb_is_pot (uint32_t v) { return ecb_is_pot32 (v); } +inline bool ecb_is_pot (uint64_t v) { return ecb_is_pot64 (v); } + +inline int ecb_ld (uint8_t v) { return ecb_ld32 (v); } +inline int ecb_ld (uint16_t v) { return ecb_ld32 (v); } +inline int ecb_ld (uint32_t v) { return ecb_ld32 (v); } +inline int ecb_ld (uint64_t v) { return ecb_ld64 (v); } + +inline int ecb_popcount (uint8_t v) { return ecb_popcount32 (v); } +inline int ecb_popcount (uint16_t v) { return ecb_popcount32 (v); } +inline int ecb_popcount (uint32_t v) { return ecb_popcount32 (v); } +inline int ecb_popcount (uint64_t v) { return ecb_popcount64 (v); } + +inline uint8_t ecb_bitrev (uint8_t v) { return ecb_bitrev8 (v); } +inline uint16_t ecb_bitrev (uint16_t v) { return ecb_bitrev16 (v); } +inline uint32_t ecb_bitrev (uint32_t v) { return ecb_bitrev32 (v); } + +inline uint8_t ecb_rotl (uint8_t v, unsigned int count) { return ecb_rotl8 (v, count); } +inline uint16_t ecb_rotl (uint16_t v, unsigned int count) { return ecb_rotl16 (v, count); } +inline uint32_t ecb_rotl (uint32_t v, unsigned int count) { return ecb_rotl32 (v, count); } +inline uint64_t ecb_rotl (uint64_t v, unsigned int count) { return ecb_rotl64 (v, count); } + +inline uint8_t ecb_rotr (uint8_t v, unsigned int count) { return ecb_rotr8 (v, count); } +inline uint16_t ecb_rotr (uint16_t v, unsigned int count) { return ecb_rotr16 (v, count); } +inline uint32_t ecb_rotr (uint32_t v, unsigned int count) { return ecb_rotr32 (v, count); } +inline uint64_t ecb_rotr (uint64_t v, unsigned int count) { return ecb_rotr64 (v, count); } + +#endif + #if ECB_GCC_VERSION(4,3) || (ECB_CLANG_BUILTIN(__builtin_bswap32) && ECB_CLANG_BUILTIN(__builtin_bswap64)) #if ECB_GCC_VERSION(4,8) || ECB_CLANG_BUILTIN(__builtin_bswap16) #define ecb_bswap16(x) __builtin_bswap16 (x) @@ -1173,6 +1339,78 @@ ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_he ecb_inline ecb_const ecb_bool ecb_little_endian (void); ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; } +/*****************************************************************************/ +/* unaligned load/store */ + +ecb_inline uint_fast16_t ecb_be_u16_to_host (uint_fast16_t v) { return ecb_little_endian () ? ecb_bswap16 (v) : v; } +ecb_inline uint_fast32_t ecb_be_u32_to_host (uint_fast32_t v) { return ecb_little_endian () ? ecb_bswap32 (v) : v; } +ecb_inline uint_fast64_t ecb_be_u64_to_host (uint_fast64_t v) { return ecb_little_endian () ? ecb_bswap64 (v) : v; } + +ecb_inline uint_fast16_t ecb_le_u16_to_host (uint_fast16_t v) { return ecb_big_endian () ? ecb_bswap16 (v) : v; } +ecb_inline uint_fast32_t ecb_le_u32_to_host (uint_fast32_t v) { return ecb_big_endian () ? ecb_bswap32 (v) : v; } +ecb_inline uint_fast64_t ecb_le_u64_to_host (uint_fast64_t v) { return ecb_big_endian () ? ecb_bswap64 (v) : v; } + +ecb_inline uint_fast16_t ecb_peek_u16_u (const void *ptr) { uint16_t v; memcpy (&v, ptr, sizeof (v)); return v; } +ecb_inline uint_fast32_t ecb_peek_u32_u (const void *ptr) { uint32_t v; memcpy (&v, ptr, sizeof (v)); return v; } +ecb_inline uint_fast64_t ecb_peek_u64_u (const void *ptr) { uint64_t v; memcpy (&v, ptr, sizeof (v)); return v; } + +ecb_inline uint_fast16_t ecb_peek_be_u16_u (const void *ptr) { return ecb_be_u16_to_host (ecb_peek_u16_u (ptr)); } +ecb_inline uint_fast32_t ecb_peek_be_u32_u (const void *ptr) { return ecb_be_u32_to_host (ecb_peek_u32_u (ptr)); } +ecb_inline uint_fast64_t ecb_peek_be_u64_u (const void *ptr) { return ecb_be_u64_to_host (ecb_peek_u64_u (ptr)); } + +ecb_inline uint_fast16_t ecb_peek_le_u16_u (const void *ptr) { return ecb_le_u16_to_host (ecb_peek_u16_u (ptr)); } +ecb_inline uint_fast32_t ecb_peek_le_u32_u (const void *ptr) { return ecb_le_u32_to_host (ecb_peek_u32_u (ptr)); } +ecb_inline uint_fast64_t ecb_peek_le_u64_u (const void *ptr) { return ecb_le_u64_to_host (ecb_peek_u64_u (ptr)); } + +ecb_inline uint_fast16_t ecb_host_to_be_u16 (uint_fast16_t v) { return ecb_little_endian () ? ecb_bswap16 (v) : v; } +ecb_inline uint_fast32_t ecb_host_to_be_u32 (uint_fast32_t v) { return ecb_little_endian () ? ecb_bswap32 (v) : v; } +ecb_inline uint_fast64_t ecb_host_to_be_u64 (uint_fast64_t v) { return ecb_little_endian () ? ecb_bswap64 (v) : v; } + +ecb_inline uint_fast16_t ecb_host_to_le_u16 (uint_fast16_t v) { return ecb_big_endian () ? ecb_bswap16 (v) : v; } +ecb_inline uint_fast32_t ecb_host_to_le_u32 (uint_fast32_t v) { return ecb_big_endian () ? ecb_bswap32 (v) : v; } +ecb_inline uint_fast64_t ecb_host_to_le_u64 (uint_fast64_t v) { return ecb_big_endian () ? ecb_bswap64 (v) : v; } + +ecb_inline void ecb_poke_u16_u (void *ptr, uint16_t v) { memcpy (ptr, &v, sizeof (v)); } +ecb_inline void ecb_poke_u32_u (void *ptr, uint32_t v) { memcpy (ptr, &v, sizeof (v)); } +ecb_inline void ecb_poke_u64_u (void *ptr, uint64_t v) { memcpy (ptr, &v, sizeof (v)); } + +ecb_inline void ecb_poke_be_u16_u (void *ptr, uint_fast16_t v) { ecb_poke_u16_u (ptr, ecb_host_to_be_u16 (v)); } +ecb_inline void ecb_poke_be_u32_u (void *ptr, uint_fast32_t v) { ecb_poke_u32_u (ptr, ecb_host_to_be_u32 (v)); } +ecb_inline void ecb_poke_be_u64_u (void *ptr, uint_fast64_t v) { ecb_poke_u64_u (ptr, ecb_host_to_be_u64 (v)); } + +ecb_inline void ecb_poke_le_u16_u (void *ptr, uint_fast16_t v) { ecb_poke_u16_u (ptr, ecb_host_to_le_u16 (v)); } +ecb_inline void ecb_poke_le_u32_u (void *ptr, uint_fast32_t v) { ecb_poke_u32_u (ptr, ecb_host_to_le_u32 (v)); } +ecb_inline void ecb_poke_le_u64_u (void *ptr, uint_fast64_t v) { ecb_poke_u64_u (ptr, ecb_host_to_le_u64 (v)); } + +#if ECB_CPP + +inline uint8_t ecb_bswap (uint8_t v) { return v; } +inline uint16_t ecb_bswap (uint16_t v) { return ecb_bswap16 (v); } +inline uint32_t ecb_bswap (uint32_t v) { return ecb_bswap32 (v); } +inline uint64_t ecb_bswap (uint64_t v) { return ecb_bswap64 (v); } + +template<typename T> inline T ecb_be_to_host (T v) { return ecb_little_endian () ? ecb_bswap (v) : v; } +template<typename T> inline T ecb_le_to_host (T v) { return ecb_big_endian () ? ecb_bswap (v) : v; } +template<typename T> inline T ecb_peek (const void *ptr) { return *(const T *)ptr; } +template<typename T> inline T ecb_peek_be (const void *ptr) { return ecb_be_to_host (ecb_peek <T> (ptr)); } +template<typename T> inline T ecb_peek_le (const void *ptr) { return ecb_le_to_host (ecb_peek <T> (ptr)); } +template<typename T> inline T ecb_peek_u (const void *ptr) { T v; memcpy (&v, ptr, sizeof (v)); return v; } +template<typename T> inline T ecb_peek_be_u (const void *ptr) { return ecb_be_to_host (ecb_peek_u<T> (ptr)); } +template<typename T> inline T ecb_peek_le_u (const void *ptr) { return ecb_le_to_host (ecb_peek_u<T> (ptr)); } + +template<typename T> inline T ecb_host_to_be (T v) { return ecb_little_endian () ? ecb_bswap (v) : v; } +template<typename T> inline T ecb_host_to_le (T v) { return ecb_big_endian () ? ecb_bswap (v) : v; } +template<typename T> inline void ecb_poke (void *ptr, T v) { *(T *)ptr = v; } +template<typename T> inline void ecb_poke_be (void *ptr, T v) { return ecb_poke <T> (ptr, ecb_host_to_be (v)); } +template<typename T> inline void ecb_poke_le (void *ptr, T v) { return ecb_poke <T> (ptr, ecb_host_to_le (v)); } +template<typename T> inline void ecb_poke_u (void *ptr, T v) { memcpy (ptr, &v, sizeof (v)); } +template<typename T> inline void ecb_poke_be_u (void *ptr, T v) { return ecb_poke_u<T> (ptr, ecb_host_to_be (v)); } +template<typename T> inline void ecb_poke_le_u (void *ptr, T v) { return ecb_poke_u<T> (ptr, ecb_host_to_le (v)); } + +#endif + +/*****************************************************************************/ + #if ECB_GCC_VERSION(3,0) || ECB_C99 #define ecb_mod(m,n) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0)) #else @@ -1206,6 +1444,8 @@ ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_he #define ecb_array_length(name) (sizeof (name) / sizeof (name [0])) #endif +/*****************************************************************************/ + ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x); ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x) @@ -1323,7 +1563,6 @@ ecb_binary32_to_binary16 (uint32_t x) || (defined __arm__ && (defined __ARM_EABI__ || defined __EABI__ || defined __VFP_FP__ || defined _WIN32_WCE || defined __ANDROID__)) \ || defined __aarch64__ #define ECB_STDFP 1 - #include <string.h> /* for memcpy */ #else #define ECB_STDFP 0 #endif @@ -1518,7 +1757,7 @@ ecb_binary32_to_binary16 (uint32_t x) #if ECB_MEMORY_FENCE_NEEDS_PTHREADS /* if your architecture doesn't need memory fences, e.g. because it is * single-cpu/core, or if you use libev in a project that doesn't use libev - * from multiple threads, then you can define ECB_AVOID_PTHREADS when compiling + * from multiple threads, then you can define ECB_NO_THREADS when compiling * libev, in which cases the memory fences become nops. * alternatively, you can remove this #error and link against libpthread, * which will then provide the memory fences. @@ -1532,18 +1771,80 @@ ecb_binary32_to_binary16 (uint32_t x) # define ECB_MEMORY_FENCE_RELEASE ECB_MEMORY_FENCE #endif -#define expect_false(cond) ecb_expect_false (cond) -#define expect_true(cond) ecb_expect_true (cond) -#define noinline ecb_noinline - #define inline_size ecb_inline #if EV_FEATURE_CODE # define inline_speed ecb_inline #else -# define inline_speed noinline static +# define inline_speed ecb_noinline static +#endif + +/*****************************************************************************/ +/* raw syscall wrappers */ + +#if EV_NEED_SYSCALL + +#include <sys/syscall.h> + +/* + * define some syscall wrappers for common architectures + * this is mostly for nice looks during debugging, not performance. + * our syscalls return < 0, not == -1, on error. which is good + * enough for linux aio. + * TODO: arm is also common nowadays, maybe even mips and x86 + * TODO: after implementing this, it suddenly looks like overkill, but its hard to remove... + */ +#if __GNUC__ && __linux && ECB_AMD64 && !EV_FEATURE_CODE + /* the costly errno access probably kills this for size optimisation */ + + #define ev_syscall(nr,narg,arg1,arg2,arg3,arg4,arg5,arg6) \ + ({ \ + long res; \ + register unsigned long r6 __asm__ ("r9" ); \ + register unsigned long r5 __asm__ ("r8" ); \ + register unsigned long r4 __asm__ ("r10"); \ + register unsigned long r3 __asm__ ("rdx"); \ + register unsigned long r2 __asm__ ("rsi"); \ + register unsigned long r1 __asm__ ("rdi"); \ + if (narg >= 6) r6 = (unsigned long)(arg6); \ + if (narg >= 5) r5 = (unsigned long)(arg5); \ + if (narg >= 4) r4 = (unsigned long)(arg4); \ + if (narg >= 3) r3 = (unsigned long)(arg3); \ + if (narg >= 2) r2 = (unsigned long)(arg2); \ + if (narg >= 1) r1 = (unsigned long)(arg1); \ + __asm__ __volatile__ ( \ + "syscall\n\t" \ + : "=a" (res) \ + : "0" (nr), "r" (r1), "r" (r2), "r" (r3), "r" (r4), "r" (r5) \ + : "cc", "r11", "cx", "memory"); \ + errno = -res; \ + res; \ + }) + +#endif + +#ifdef ev_syscall + #define ev_syscall0(nr) ev_syscall (nr, 0, 0, 0, 0, 0, 0, 0) + #define ev_syscall1(nr,arg1) ev_syscall (nr, 1, arg1, 0, 0, 0, 0, 0) + #define ev_syscall2(nr,arg1,arg2) ev_syscall (nr, 2, arg1, arg2, 0, 0, 0, 0) + #define ev_syscall3(nr,arg1,arg2,arg3) ev_syscall (nr, 3, arg1, arg2, arg3, 0, 0, 0) + #define ev_syscall4(nr,arg1,arg2,arg3,arg4) ev_syscall (nr, 3, arg1, arg2, arg3, arg4, 0, 0) + #define ev_syscall5(nr,arg1,arg2,arg3,arg4,arg5) ev_syscall (nr, 5, arg1, arg2, arg3, arg4, arg5, 0) + #define ev_syscall6(nr,arg1,arg2,arg3,arg4,arg5,arg6) ev_syscall (nr, 6, arg1, arg2, arg3, arg4, arg5,arg6) +#else + #define ev_syscall0(nr) syscall (nr) + #define ev_syscall1(nr,arg1) syscall (nr, arg1) + #define ev_syscall2(nr,arg1,arg2) syscall (nr, arg1, arg2) + #define ev_syscall3(nr,arg1,arg2,arg3) syscall (nr, arg1, arg2, arg3) + #define ev_syscall4(nr,arg1,arg2,arg3,arg4) syscall (nr, arg1, arg2, arg3, arg4) + #define ev_syscall5(nr,arg1,arg2,arg3,arg4,arg5) syscall (nr, arg1, arg2, arg3, arg4, arg5) + #define ev_syscall6(nr,arg1,arg2,arg3,arg4,arg5,arg6) syscall (nr, arg1, arg2, arg3, arg4, arg5,arg6) #endif +#endif + +/*****************************************************************************/ + #define NUMPRI (EV_MAXPRI - EV_MINPRI + 1) #if EV_MINPRI == EV_MAXPRI @@ -1552,8 +1853,7 @@ ecb_binary32_to_binary16 (uint32_t x) # define ABSPRI(w) (((W)w)->priority - EV_MINPRI) #endif -#define EMPTY /* required for microsofts broken pseudo-c compiler */ -#define EMPTY2(a,b) /* used to suppress some warnings */ +#define EMPTY /* required for microsofts broken pseudo-c compiler */ typedef ev_watcher *W; typedef ev_watcher_list *WL; @@ -1590,6 +1890,10 @@ static EV_ATOMIC_T have_cheap_timer = 0; /*****************************************************************************/ +#if EV_USE_LINUXAIO +# include <linux/aio_abi.h> /* probably only needed for aio_context_t */ +#endif + /* define a suitable floor function (only used by periodics atm) */ #if EV_USE_FLOOR @@ -1600,7 +1904,7 @@ static EV_ATOMIC_T have_cheap_timer = 0; #include <float.h> /* a floor() replacement function, should be independent of ev_tstamp type */ -noinline +ecb_noinline static ev_tstamp ev_floor (ev_tstamp v) { @@ -1611,26 +1915,26 @@ ev_floor (ev_tstamp v) const ev_tstamp shift = sizeof (unsigned long) >= 8 ? 18446744073709551616. : 4294967296.; #endif - /* argument too large for an unsigned long? */ - if (expect_false (v >= shift)) + /* special treatment for negative arguments */ + if (ecb_expect_false (v < 0.)) + { + ev_tstamp f = -ev_floor (-v); + + return f - (f == v ? 0 : 1); + } + + /* argument too large for an unsigned long? then reduce it */ + if (ecb_expect_false (v >= shift)) { ev_tstamp f; if (v == v - 1.) - return v; /* very large number */ + return v; /* very large numbers are assumed to be integer */ f = shift * ev_floor (v * (1. / shift)); return f + ev_floor (v - f); } - /* special treatment for negative args? */ - if (expect_false (v < 0.)) - { - ev_tstamp f = -ev_floor (-v); - - return f - (f == v ? 0 : 1); - } - /* fits into an unsigned long */ return (unsigned long)v; } @@ -1643,7 +1947,7 @@ ev_floor (ev_tstamp v) # include <sys/utsname.h> #endif -noinline ecb_cold +ecb_noinline ecb_cold static unsigned int ev_linux_version (void) { @@ -1683,7 +1987,7 @@ ev_linux_version (void) /*****************************************************************************/ #if EV_AVOID_STDIO -noinline ecb_cold +ecb_noinline ecb_cold static void ev_printerr (const char *msg) { @@ -1700,7 +2004,7 @@ ev_set_syserr_cb (void (*cb)(const char *msg) EV_NOEXCEPT) EV_NOEXCEPT syserr_cb = cb; } -noinline ecb_cold +ecb_noinline ecb_cold static void ev_syserr (const char *msg) { @@ -1781,8 +2085,8 @@ typedef struct WL head; unsigned char events; /* the events watched for */ unsigned char reify; /* flag set when this ANFD needs reification (EV_ANFD_REIFY, EV__IOFDSET) */ - unsigned char emask; /* the epoll backend stores the actual kernel mask in here */ - unsigned char unused; + unsigned char emask; /* some backends store the actual kernel mask in here */ + unsigned char eflags; /* flags field for use by backends */ #if EV_USE_EPOLL unsigned int egen; /* generation counter to counter epoll bugs */ #endif @@ -1842,10 +2146,11 @@ typedef struct #include "ev_wrap.h" static struct ev_loop default_loop_struct; - static struct ev_loop *ev_default_loop_ptr = 0; + EV_API_DECL struct ev_loop *ev_default_loop_ptr = 0; /* needs to be initialised to make it a definition despite extern */ + #else - EV_API_DECL ev_tstamp ev_rt_now = 0; /* needs to be initialised to make it a definition despite extern */ + EV_API_DECL ev_tstamp ev_rt_now = EV_TS_CONST (0.); /* needs to be initialised to make it a definition despite extern */ #define VAR(name,decl) static decl; #include "ev_vars.h" #undef VAR @@ -1855,8 +2160,8 @@ typedef struct #endif #if EV_FEATURE_API -# define EV_RELEASE_CB if (expect_false (release_cb)) release_cb (EV_A) -# define EV_ACQUIRE_CB if (expect_false (acquire_cb)) acquire_cb (EV_A) +# define EV_RELEASE_CB if (ecb_expect_false (release_cb)) release_cb (EV_A) +# define EV_ACQUIRE_CB if (ecb_expect_false (acquire_cb)) acquire_cb (EV_A) # define EV_INVOKE_PENDING invoke_cb (EV_A) #else # define EV_RELEASE_CB (void)0 @@ -1873,17 +2178,19 @@ ev_tstamp ev_time (void) EV_NOEXCEPT { #if EV_USE_REALTIME - if (expect_true (have_realtime)) + if (ecb_expect_true (have_realtime)) { struct timespec ts; clock_gettime (CLOCK_REALTIME, &ts); - return ts.tv_sec + ts.tv_nsec * 1e-9; + return EV_TS_GET (ts); } #endif - struct timeval tv; - gettimeofday (&tv, 0); - return tv.tv_sec + tv.tv_usec * 1e-6; + { + struct timeval tv; + gettimeofday (&tv, 0); + return EV_TV_GET (tv); + } } #endif @@ -1891,11 +2198,11 @@ inline_size ev_tstamp get_clock (void) { #if EV_USE_MONOTONIC - if (expect_true (have_monotonic)) + if (ecb_expect_true (have_monotonic)) { struct timespec ts; clock_gettime (monotinic_clock_id, &ts); - return ts.tv_sec + ts.tv_nsec * 1e-9; + return ((ev_tstamp)ts.tv_sec) + ts.tv_nsec * 1e-9; } #endif @@ -1913,7 +2220,7 @@ ev_now (EV_P) EV_NOEXCEPT void ev_sleep (ev_tstamp delay) EV_NOEXCEPT { - if (delay > 0.) + if (delay > EV_TS_CONST (0.)) { #if EV_USE_NANOSLEEP struct timespec ts; @@ -1923,7 +2230,7 @@ ev_sleep (ev_tstamp delay) EV_NOEXCEPT #elif defined _WIN32 /* maybe this should round up, as ms is very low resolution */ /* compared to select (µs) or nanosleep (ns) */ - Sleep ((unsigned long)(delay * 1e3)); + Sleep ((unsigned long)(EV_TS_TO_MSEC (delay))); #else struct timeval tv; @@ -1963,7 +2270,7 @@ array_nextsize (int elem, int cur, int cnt) return ncur; } -noinline ecb_cold +ecb_noinline ecb_cold static void * array_realloc (int elem, void *base, int *cur, int cnt) { @@ -1971,16 +2278,18 @@ array_realloc (int elem, void *base, int *cur, int cnt) return ev_realloc (base, elem * *cur); } -#define array_init_zero(base,count) \ - memset ((void *)(base), 0, sizeof (*(base)) * (count)) +#define array_needsize_noinit(base,offset,count) + +#define array_needsize_zerofill(base,offset,count) \ + memset ((void *)(base + offset), 0, sizeof (*(base)) * (count)) #define array_needsize(type,base,cur,cnt,init) \ - if (expect_false ((cnt) > (cur))) \ + if (ecb_expect_false ((cnt) > (cur))) \ { \ ecb_unused int ocur_ = (cur); \ (base) = (type *)array_realloc \ (sizeof (type), (base), &(cur), (cnt)); \ - init ((base) + (ocur_), (cur) - ocur_); \ + init ((base), ocur_, ((cur) - ocur_)); \ } #if 0 @@ -1999,25 +2308,25 @@ array_realloc (int elem, void *base, int *cur, int cnt) /*****************************************************************************/ /* dummy callback for pending events */ -noinline +ecb_noinline static void pendingcb (EV_P_ ev_prepare *w, int revents) { } -noinline +ecb_noinline void ev_feed_event (EV_P_ void *w, int revents) EV_NOEXCEPT { W w_ = (W)w; int pri = ABSPRI (w_); - if (expect_false (w_->pending)) + if (ecb_expect_false (w_->pending)) pendings [pri][w_->pending - 1].events |= revents; else { w_->pending = ++pendingcnt [pri]; - array_needsize (ANPENDING, pendings [pri], pendingmax [pri], w_->pending, EMPTY2); + array_needsize (ANPENDING, pendings [pri], pendingmax [pri], w_->pending, array_needsize_noinit); pendings [pri][w_->pending - 1].w = w_; pendings [pri][w_->pending - 1].events = revents; } @@ -2028,7 +2337,7 @@ ev_feed_event (EV_P_ void *w, int revents) EV_NOEXCEPT inline_speed void feed_reverse (EV_P_ W w) { - array_needsize (W, rfeeds, rfeedmax, rfeedcnt + 1, EMPTY2); + array_needsize (W, rfeeds, rfeedmax, rfeedcnt + 1, array_needsize_noinit); rfeeds [rfeedcnt++] = w; } @@ -2073,7 +2382,7 @@ fd_event (EV_P_ int fd, int revents) { ANFD *anfd = anfds + fd; - if (expect_true (!anfd->reify)) + if (ecb_expect_true (!anfd->reify)) fd_event_nocheck (EV_A_ fd, revents); } @@ -2091,8 +2400,20 @@ fd_reify (EV_P) { int i; + /* most backends do not modify the fdchanges list in backend_modfiy. + * except io_uring, which has fixed-size buffers which might force us + * to handle events in backend_modify, causing fdchanges to be amended, + * which could result in an endless loop. + * to avoid this, we do not dynamically handle fds that were added + * during fd_reify. that means that for those backends, fdchangecnt + * might be non-zero during poll, which must cause them to not block. + * to not put too much of a burden on other backends, this detail + * needs to be handled in the backend. + */ + int changecnt = fdchangecnt; + #if EV_SELECT_IS_WINSOCKET || EV_USE_IOCP - for (i = 0; i < fdchangecnt; ++i) + for (i = 0; i < changecnt; ++i) { int fd = fdchanges [i]; ANFD *anfd = anfds + fd; @@ -2116,7 +2437,7 @@ fd_reify (EV_P) } #endif - for (i = 0; i < fdchangecnt; ++i) + for (i = 0; i < changecnt; ++i) { int fd = fdchanges [i]; ANFD *anfd = anfds + fd; @@ -2125,9 +2446,9 @@ fd_reify (EV_P) unsigned char o_events = anfd->events; unsigned char o_reify = anfd->reify; - anfd->reify = 0; + anfd->reify = 0; - /*if (expect_true (o_reify & EV_ANFD_REIFY)) probably a deoptimisation */ + /*if (ecb_expect_true (o_reify & EV_ANFD_REIFY)) probably a deoptimisation */ { anfd->events = 0; @@ -2142,7 +2463,14 @@ fd_reify (EV_P) backend_modify (EV_A_ fd, o_events, anfd->events); } - fdchangecnt = 0; + /* normally, fdchangecnt hasn't changed. if it has, then new fds have been added. + * this is a rare case (see beginning comment in this function), so we copy them to the + * front and hope the backend handles this case. + */ + if (ecb_expect_false (fdchangecnt != changecnt)) + memmove (fdchanges, fdchanges + changecnt, (fdchangecnt - changecnt) * sizeof (*fdchanges)); + + fdchangecnt -= changecnt; } /* something about the given fd changed */ @@ -2151,12 +2479,12 @@ void fd_change (EV_P_ int fd, int flags) { unsigned char reify = anfds [fd].reify; - anfds [fd].reify |= flags; + anfds [fd].reify = reify | flags; - if (expect_true (!reify)) + if (ecb_expect_true (!reify)) { ++fdchangecnt; - array_needsize (int, fdchanges, fdchangemax, fdchangecnt, EMPTY2); + array_needsize (int, fdchanges, fdchangemax, fdchangecnt, array_needsize_noinit); fdchanges [fdchangecnt - 1] = fd; } } @@ -2186,7 +2514,7 @@ fd_valid (int fd) } /* called on EBADF to verify fds */ -noinline ecb_cold +ecb_noinline ecb_cold static void fd_ebadf (EV_P) { @@ -2199,7 +2527,7 @@ fd_ebadf (EV_P) } /* called on ENOMEM in select/poll to kill some fds and retry */ -noinline ecb_cold +ecb_noinline ecb_cold static void fd_enomem (EV_P) { @@ -2214,7 +2542,7 @@ fd_enomem (EV_P) } /* usually called after fork if backend needs to re-arm all fds from scratch */ -noinline +ecb_noinline static void fd_rearm_all (EV_P) { @@ -2278,19 +2606,19 @@ downheap (ANHE *heap, int N, int k) ANHE *pos = heap + DHEAP * (k - HEAP0) + HEAP0 + 1; /* find minimum child */ - if (expect_true (pos + DHEAP - 1 < E)) + if (ecb_expect_true (pos + DHEAP - 1 < E)) { /* fast path */ (minpos = pos + 0), (minat = ANHE_at (*minpos)); - if ( ANHE_at (pos [1]) < minat) (minpos = pos + 1), (minat = ANHE_at (*minpos)); - if ( ANHE_at (pos [2]) < minat) (minpos = pos + 2), (minat = ANHE_at (*minpos)); - if ( ANHE_at (pos [3]) < minat) (minpos = pos + 3), (minat = ANHE_at (*minpos)); + if ( minat > ANHE_at (pos [1])) (minpos = pos + 1), (minat = ANHE_at (*minpos)); + if ( minat > ANHE_at (pos [2])) (minpos = pos + 2), (minat = ANHE_at (*minpos)); + if ( minat > ANHE_at (pos [3])) (minpos = pos + 3), (minat = ANHE_at (*minpos)); } else if (pos < E) { /* slow path */ (minpos = pos + 0), (minat = ANHE_at (*minpos)); - if (pos + 1 < E && ANHE_at (pos [1]) < minat) (minpos = pos + 1), (minat = ANHE_at (*minpos)); - if (pos + 2 < E && ANHE_at (pos [2]) < minat) (minpos = pos + 2), (minat = ANHE_at (*minpos)); - if (pos + 3 < E && ANHE_at (pos [3]) < minat) (minpos = pos + 3), (minat = ANHE_at (*minpos)); + if (pos + 1 < E && minat > ANHE_at (pos [1])) (minpos = pos + 1), (minat = ANHE_at (*minpos)); + if (pos + 2 < E && minat > ANHE_at (pos [2])) (minpos = pos + 2), (minat = ANHE_at (*minpos)); + if (pos + 3 < E && minat > ANHE_at (pos [3])) (minpos = pos + 3), (minat = ANHE_at (*minpos)); } else break; @@ -2308,7 +2636,7 @@ downheap (ANHE *heap, int N, int k) ev_active (ANHE_w (he)) = k; } -#else /* 4HEAP */ +#else /* not 4HEAP */ #define HEAP0 1 #define HPARENT(k) ((k) >> 1) @@ -2390,7 +2718,7 @@ reheap (ANHE *heap, int N) /*****************************************************************************/ -/* associate signal watchers to a signal signal */ +/* associate signal watchers to a signal */ typedef struct { EV_ATOMIC_T pending; @@ -2406,7 +2734,7 @@ static ANSIG signals [EV_NSIG - 1]; #if EV_SIGNAL_ENABLE || EV_ASYNC_ENABLE -noinline ecb_cold +ecb_noinline ecb_cold static void evpipe_init (EV_P) { @@ -2457,7 +2785,7 @@ evpipe_write (EV_P_ EV_ATOMIC_T *flag) { ECB_MEMORY_FENCE; /* push out the write before this function was called, acquire flag */ - if (expect_true (*flag)) + if (ecb_expect_true (*flag)) return; *flag = 1; @@ -2544,7 +2872,7 @@ pipecb (EV_P_ ev_io *iow, int revents) ECB_MEMORY_FENCE; for (i = EV_NSIG - 1; i--; ) - if (expect_false (signals [i].pending)) + if (ecb_expect_false (signals [i].pending)) ev_feed_signal_event (EV_A_ i + 1); } #endif @@ -2595,13 +2923,13 @@ ev_sighandler (int signum) ev_feed_signal (signum); } -noinline +ecb_noinline void ev_feed_signal_event (EV_P_ int signum) EV_NOEXCEPT { WL w; - if (expect_false (signum <= 0 || signum >= EV_NSIG)) + if (ecb_expect_false (signum <= 0 || signum >= EV_NSIG)) return; --signum; @@ -2610,7 +2938,7 @@ ev_feed_signal_event (EV_P_ int signum) EV_NOEXCEPT /* it is permissible to try to feed a signal to the wrong loop */ /* or, likely more useful, feeding a signal nobody is waiting for */ - if (expect_false (signals [signum].loop != EV_A)) + if (ecb_expect_false (signals [signum].loop != EV_A)) return; #endif @@ -2704,6 +3032,57 @@ childcb (EV_P_ ev_signal *sw, int revents) /*****************************************************************************/ +#if EV_USE_TIMERFD + +static void periodics_reschedule (EV_P); + +static void +timerfdcb (EV_P_ ev_io *iow, int revents) +{ + struct itimerspec its = { 0 }; + + its.it_value.tv_sec = ev_rt_now + (int)MAX_BLOCKTIME2; + timerfd_settime (timerfd, TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET, &its, 0); + + ev_rt_now = ev_time (); + /* periodics_reschedule only needs ev_rt_now */ + /* but maybe in the future we want the full treatment. */ + /* + now_floor = EV_TS_CONST (0.); + time_update (EV_A_ EV_TSTAMP_HUGE); + */ +#if EV_PERIODIC_ENABLE + periodics_reschedule (EV_A); +#endif +} + +ecb_noinline ecb_cold +static void +evtimerfd_init (EV_P) +{ + if (!ev_is_active (&timerfd_w)) + { + timerfd = timerfd_create (CLOCK_REALTIME, TFD_NONBLOCK | TFD_CLOEXEC); + + if (timerfd >= 0) + { + fd_intern (timerfd); /* just to be sure */ + + ev_io_init (&timerfd_w, timerfdcb, timerfd, EV_READ); + ev_set_priority (&timerfd_w, EV_MINPRI); + ev_io_start (EV_A_ &timerfd_w); + ev_unref (EV_A); /* watcher should not keep loop alive */ + + /* (re-) arm timer */ + timerfdcb (EV_A_ 0, 0); + } + } +} + +#endif + +/*****************************************************************************/ + #if EV_USE_IOCP # include "ev_iocp.c" #endif @@ -2716,6 +3095,12 @@ childcb (EV_P_ ev_signal *sw, int revents) #if EV_USE_EPOLL # include "ev_epoll.c" #endif +#if EV_USE_LINUXAIO +# include "ev_linuxaio.c" +#endif +#if EV_USE_IOURING +# include "ev_iouring.c" +#endif #if EV_USE_POLL # include "ev_poll.c" #endif @@ -2753,11 +3138,13 @@ ev_supported_backends (void) EV_NOEXCEPT { unsigned int flags = 0; - if (EV_USE_PORT ) flags |= EVBACKEND_PORT; - if (EV_USE_KQUEUE) flags |= EVBACKEND_KQUEUE; - if (EV_USE_EPOLL ) flags |= EVBACKEND_EPOLL; - if (EV_USE_POLL ) flags |= EVBACKEND_POLL; - if (EV_USE_SELECT) flags |= EVBACKEND_SELECT; + if (EV_USE_PORT ) flags |= EVBACKEND_PORT; + if (EV_USE_KQUEUE ) flags |= EVBACKEND_KQUEUE; + if (EV_USE_EPOLL ) flags |= EVBACKEND_EPOLL; + if (EV_USE_LINUXAIO ) flags |= EVBACKEND_LINUXAIO; + if (EV_USE_IOURING && ev_linux_version () >= 0x050601) flags |= EVBACKEND_IOURING; /* 5.6.1+ */ + if (EV_USE_POLL ) flags |= EVBACKEND_POLL; + if (EV_USE_SELECT ) flags |= EVBACKEND_SELECT; return flags; } @@ -2782,6 +3169,15 @@ ev_recommended_backends (void) EV_NOEXCEPT flags &= ~EVBACKEND_POLL; /* poll return value is unusable (http://forums.freebsd.org/archive/index.php/t-10270.html) */ #endif + /* TODO: linuxaio is very experimental */ +#if !EV_RECOMMEND_LINUXAIO + flags &= ~EVBACKEND_LINUXAIO; +#endif + /* TODO: linuxaio is super experimental */ +#if !EV_RECOMMEND_IOURING + flags &= ~EVBACKEND_IOURING; +#endif + return flags; } @@ -2789,12 +3185,14 @@ ecb_cold unsigned int ev_embeddable_backends (void) EV_NOEXCEPT { - int flags = EVBACKEND_EPOLL | EVBACKEND_KQUEUE | EVBACKEND_PORT; + int flags = EVBACKEND_EPOLL | EVBACKEND_KQUEUE | EVBACKEND_PORT | EVBACKEND_IOURING; /* epoll embeddability broken on all linux versions up to at least 2.6.23 */ if (ev_linux_version () < 0x020620) /* disable it on linux < 2.6.32 */ flags &= ~EVBACKEND_EPOLL; + /* EVBACKEND_LINUXAIO is theoretically embeddable, but suffers from a performance overhead */ + return flags; } @@ -2855,20 +3253,8 @@ ev_set_loop_release_cb (EV_P_ void (*release)(EV_P) EV_NOEXCEPT, void (*acquire) } #endif -EV_INLINE struct ev_loop * -ev_default_loop_uc_ (void) EV_NOEXCEPT -{ - return ev_default_loop_ptr; -} - -EV_INLINE int -ev_is_default_loop (EV_P) EV_NOEXCEPT -{ - return EV_A == EV_DEFAULT_UC; -} - /* initialise a loop structure, must be zero-initialised */ -noinline ecb_cold +ecb_noinline ecb_cold static void loop_init (EV_P_ unsigned int flags) EV_NOEXCEPT { @@ -2952,27 +3338,36 @@ loop_init (EV_P_ unsigned int flags) EV_NOEXCEPT #if EV_USE_SIGNALFD sigfd = flags & EVFLAG_SIGNALFD ? -2 : -1; #endif +#if EV_USE_TIMERFD + timerfd = flags & EVFLAG_NOTIMERFD ? -1 : -2; +#endif if (!(flags & EVBACKEND_MASK)) flags |= ev_recommended_backends (); #if EV_USE_IOCP - if (!backend && (flags & EVBACKEND_IOCP )) backend = iocp_init (EV_A_ flags); + if (!backend && (flags & EVBACKEND_IOCP )) backend = iocp_init (EV_A_ flags); #endif #if EV_USE_PORT - if (!backend && (flags & EVBACKEND_PORT )) backend = port_init (EV_A_ flags); + if (!backend && (flags & EVBACKEND_PORT )) backend = port_init (EV_A_ flags); #endif #if EV_USE_KQUEUE - if (!backend && (flags & EVBACKEND_KQUEUE)) backend = kqueue_init (EV_A_ flags); + if (!backend && (flags & EVBACKEND_KQUEUE )) backend = kqueue_init (EV_A_ flags); +#endif +#if EV_USE_IOURING + if (!backend && (flags & EVBACKEND_IOURING )) backend = iouring_init (EV_A_ flags); +#endif +#if EV_USE_LINUXAIO + if (!backend && (flags & EVBACKEND_LINUXAIO)) backend = linuxaio_init (EV_A_ flags); #endif #if EV_USE_EPOLL - if (!backend && (flags & EVBACKEND_EPOLL )) backend = epoll_init (EV_A_ flags); + if (!backend && (flags & EVBACKEND_EPOLL )) backend = epoll_init (EV_A_ flags); #endif #if EV_USE_POLL - if (!backend && (flags & EVBACKEND_POLL )) backend = poll_init (EV_A_ flags); + if (!backend && (flags & EVBACKEND_POLL )) backend = poll_init (EV_A_ flags); #endif #if EV_USE_SELECT - if (!backend && (flags & EVBACKEND_SELECT)) backend = select_init (EV_A_ flags); + if (!backend && (flags & EVBACKEND_SELECT )) backend = select_init (EV_A_ flags); #endif ev_prepare_init (&pending_w, pendingcb); @@ -2984,6 +3379,18 @@ loop_init (EV_P_ unsigned int flags) EV_NOEXCEPT } } +EV_INLINE struct ev_loop * +ev_default_loop_uc_ (void) EV_NOEXCEPT +{ + return ev_default_loop_ptr; +} + +EV_INLINE int +ev_is_default_loop (EV_P) EV_NOEXCEPT +{ + return EV_A == EV_DEFAULT_UC; +} + /* free up a loop structure */ ecb_cold void @@ -2999,7 +3406,7 @@ ev_loop_destroy (EV_P) #if EV_CLEANUP_ENABLE /* queue cleanup watchers (and execute them) */ - if (expect_false (cleanupcnt)) + if (ecb_expect_false (cleanupcnt)) { queue_events (EV_A_ (W *)cleanups, cleanupcnt, EV_CLEANUP); EV_INVOKE_PENDING; @@ -3028,6 +3435,11 @@ ev_loop_destroy (EV_P) close (sigfd); #endif +#if EV_USE_TIMERFD + if (ev_is_active (&timerfd_w)) + close (timerfd); +#endif + #if EV_USE_INOTIFY if (fs_fd >= 0) close (fs_fd); @@ -3037,22 +3449,28 @@ ev_loop_destroy (EV_P) close (backend_fd); #if EV_USE_IOCP - if (backend == EVBACKEND_IOCP ) iocp_destroy (EV_A); + if (backend == EVBACKEND_IOCP ) iocp_destroy (EV_A); #endif #if EV_USE_PORT - if (backend == EVBACKEND_PORT ) port_destroy (EV_A); + if (backend == EVBACKEND_PORT ) port_destroy (EV_A); #endif #if EV_USE_KQUEUE - if (backend == EVBACKEND_KQUEUE) kqueue_destroy (EV_A); + if (backend == EVBACKEND_KQUEUE ) kqueue_destroy (EV_A); +#endif +#if EV_USE_IOURING + if (backend == EVBACKEND_IOURING ) iouring_destroy (EV_A); +#endif +#if EV_USE_LINUXAIO + if (backend == EVBACKEND_LINUXAIO) linuxaio_destroy (EV_A); #endif #if EV_USE_EPOLL - if (backend == EVBACKEND_EPOLL ) epoll_destroy (EV_A); + if (backend == EVBACKEND_EPOLL ) epoll_destroy (EV_A); #endif #if EV_USE_POLL - if (backend == EVBACKEND_POLL ) poll_destroy (EV_A); + if (backend == EVBACKEND_POLL ) poll_destroy (EV_A); #endif #if EV_USE_SELECT - if (backend == EVBACKEND_SELECT) select_destroy (EV_A); + if (backend == EVBACKEND_SELECT ) select_destroy (EV_A); #endif for (i = NUMPRI; i--; ) @@ -3104,34 +3522,62 @@ inline_size void loop_fork (EV_P) { #if EV_USE_PORT - if (backend == EVBACKEND_PORT ) port_fork (EV_A); + if (backend == EVBACKEND_PORT ) port_fork (EV_A); #endif #if EV_USE_KQUEUE - if (backend == EVBACKEND_KQUEUE) kqueue_fork (EV_A); + if (backend == EVBACKEND_KQUEUE ) kqueue_fork (EV_A); +#endif +#if EV_USE_IOURING + if (backend == EVBACKEND_IOURING ) iouring_fork (EV_A); +#endif +#if EV_USE_LINUXAIO + if (backend == EVBACKEND_LINUXAIO) linuxaio_fork (EV_A); #endif #if EV_USE_EPOLL - if (backend == EVBACKEND_EPOLL ) epoll_fork (EV_A); + if (backend == EVBACKEND_EPOLL ) epoll_fork (EV_A); #endif #if EV_USE_INOTIFY infy_fork (EV_A); #endif -#if EV_SIGNAL_ENABLE || EV_ASYNC_ENABLE - if (ev_is_active (&pipe_w) && postfork != 2) + if (postfork != 2) { - /* pipe_write_wanted must be false now, so modifying fd vars should be safe */ + #if EV_USE_SIGNALFD + /* surprisingly, nothing needs to be done for signalfd, accoridng to docs, it does the right thing on fork */ + #endif - ev_ref (EV_A); - ev_io_stop (EV_A_ &pipe_w); + #if EV_USE_TIMERFD + if (ev_is_active (&timerfd_w)) + { + ev_ref (EV_A); + ev_io_stop (EV_A_ &timerfd_w); + + close (timerfd); + timerfd = -2; - if (evpipe [0] >= 0) - EV_WIN32_CLOSE_FD (evpipe [0]); + evtimerfd_init (EV_A); + /* reschedule periodics, in case we missed something */ + ev_feed_event (EV_A_ &timerfd_w, EV_CUSTOM); + } + #endif + + #if EV_SIGNAL_ENABLE || EV_ASYNC_ENABLE + if (ev_is_active (&pipe_w)) + { + /* pipe_write_wanted must be false now, so modifying fd vars should be safe */ - evpipe_init (EV_A); - /* iterate over everything, in case we missed something before */ - ev_feed_event (EV_A_ &pipe_w, EV_CUSTOM); + ev_ref (EV_A); + ev_io_stop (EV_A_ &pipe_w); + + if (evpipe [0] >= 0) + EV_WIN32_CLOSE_FD (evpipe [0]); + + evpipe_init (EV_A); + /* iterate over everything, in case we missed something before */ + ev_feed_event (EV_A_ &pipe_w, EV_CUSTOM); + } + #endif } -#endif postfork = 0; } @@ -3157,7 +3603,7 @@ ev_loop_new (unsigned int flags) EV_NOEXCEPT #endif /* multiplicity */ #if EV_VERIFY -noinline ecb_cold +ecb_noinline ecb_cold static void verify_watcher (EV_P_ W w) { @@ -3167,7 +3613,7 @@ verify_watcher (EV_P_ W w) assert (("libev: pending watcher not on pending queue", pendings [ABSPRI (w)][w->pending - 1].w == w)); } -noinline ecb_cold +ecb_noinline ecb_cold static void verify_heap (EV_P_ ANHE *heap, int N) { @@ -3183,7 +3629,7 @@ verify_heap (EV_P_ ANHE *heap, int N) } } -noinline ecb_cold +ecb_noinline ecb_cold static void array_verify (EV_P_ W *ws, int cnt) { @@ -3342,7 +3788,7 @@ ev_pending_count (EV_P) EV_NOEXCEPT return count; } -noinline +ecb_noinline void ev_invoke_pending (EV_P) { @@ -3371,7 +3817,7 @@ ev_invoke_pending (EV_P) inline_size void idle_reify (EV_P) { - if (expect_false (idleall)) + if (ecb_expect_false (idleall)) { int pri; @@ -3411,7 +3857,7 @@ timers_reify (EV_P) if (ev_at (w) < mn_now) ev_at (w) = mn_now; - assert (("libev: negative ev_timer repeat value found while processing timers", w->repeat > 0.)); + assert (("libev: negative ev_timer repeat value found while processing timers", w->repeat > EV_TS_CONST (0.))); ANHE_at_cache (timers [HEAP0]); downheap (timers, timercnt, HEAP0); @@ -3430,7 +3876,7 @@ timers_reify (EV_P) #if EV_PERIODIC_ENABLE -noinline +ecb_noinline static void periodic_recalc (EV_P_ ev_periodic *w) { @@ -3443,7 +3889,7 @@ periodic_recalc (EV_P_ ev_periodic *w) ev_tstamp nat = at + w->interval; /* when resolution fails us, we use ev_rt_now */ - if (expect_false (nat == at)) + if (ecb_expect_false (nat == at)) { at = ev_rt_now; break; @@ -3499,7 +3945,7 @@ periodics_reify (EV_P) /* simply recalculate all periodics */ /* TODO: maybe ensure that at least one event happens when jumping forward? */ -noinline ecb_cold +ecb_noinline ecb_cold static void periodics_reschedule (EV_P) { @@ -3523,7 +3969,7 @@ periodics_reschedule (EV_P) #endif /* adjust all timers by a given offset */ -noinline ecb_cold +ecb_noinline ecb_cold static void timers_reschedule (EV_P_ ev_tstamp adjust) { @@ -3543,7 +3989,7 @@ inline_speed void time_update (EV_P_ ev_tstamp max_block) { #if EV_USE_MONOTONIC - if (expect_true (have_monotonic)) + if (ecb_expect_true (have_monotonic)) { int i; ev_tstamp odiff = rtmn_diff; @@ -3552,7 +3998,7 @@ time_update (EV_P_ ev_tstamp max_block) /* only fetch the realtime clock every 0.5*MIN_TIMEJUMP seconds */ /* interpolate in the meantime */ - if (expect_true (mn_now - now_floor < MIN_TIMEJUMP * .5)) + if (ecb_expect_true (mn_now - now_floor < EV_TS_CONST (MIN_TIMEJUMP * .5))) { ev_rt_now = rtmn_diff + mn_now; return; @@ -3576,7 +4022,7 @@ time_update (EV_P_ ev_tstamp max_block) diff = odiff - rtmn_diff; - if (expect_true ((diff < 0. ? -diff : diff) < MIN_TIMEJUMP)) + if (ecb_expect_true ((diff < EV_TS_CONST (0.) ? -diff : diff) < EV_TS_CONST (MIN_TIMEJUMP))) return; /* all is well */ ev_rt_now = ev_time (); @@ -3595,7 +4041,7 @@ time_update (EV_P_ ev_tstamp max_block) { ev_rt_now = ev_time (); - if (expect_false (mn_now > ev_rt_now || ev_rt_now > mn_now + max_block + MIN_TIMEJUMP)) + if (ecb_expect_false (mn_now > ev_rt_now || ev_rt_now > mn_now + max_block + EV_TS_CONST (MIN_TIMEJUMP))) { /* adjust timers. this is easy, as the offset is the same for all of them */ timers_reschedule (EV_A_ ev_rt_now - mn_now); @@ -3628,8 +4074,8 @@ ev_run (EV_P_ int flags) #endif #ifndef _WIN32 - if (expect_false (curpid)) /* penalise the forking check even more */ - if (expect_false (getpid () != curpid)) + if (ecb_expect_false (curpid)) /* penalise the forking check even more */ + if (ecb_expect_false (getpid () != curpid)) { curpid = getpid (); postfork = 1; @@ -3638,7 +4084,7 @@ ev_run (EV_P_ int flags) #if EV_FORK_ENABLE /* we might have forked, so queue fork handlers */ - if (expect_false (postfork)) + if (ecb_expect_false (postfork)) if (forkcnt) { queue_events (EV_A_ (W *)forks, forkcnt, EV_FORK); @@ -3648,18 +4094,18 @@ ev_run (EV_P_ int flags) #if EV_PREPARE_ENABLE /* queue prepare watchers (and execute them) */ - if (expect_false (preparecnt)) + if (ecb_expect_false (preparecnt)) { queue_events (EV_A_ (W *)prepares, preparecnt, EV_PREPARE); EV_INVOKE_PENDING; } #endif - if (expect_false (loop_done)) + if (ecb_expect_false (loop_done)) break; /* we might have forked, so reify kernel state if necessary */ - if (expect_false (postfork)) + if (ecb_expect_false (postfork)) loop_fork (EV_A); /* update fd-related kernel structures */ @@ -3674,16 +4120,28 @@ ev_run (EV_P_ int flags) ev_tstamp prev_mn_now = mn_now; /* update time to cancel out callback processing overhead */ - time_update (EV_A_ 1e100); + time_update (EV_A_ EV_TS_CONST (EV_TSTAMP_HUGE)); /* from now on, we want a pipe-wake-up */ pipe_write_wanted = 1; ECB_MEMORY_FENCE; /* make sure pipe_write_wanted is visible before we check for potential skips */ - if (expect_true (!(flags & EVRUN_NOWAIT || idleall || !activecnt || pipe_write_skipped))) + if (ecb_expect_true (!(flags & EVRUN_NOWAIT || idleall || !activecnt || pipe_write_skipped))) { - waittime = MAX_BLOCKTIME; + waittime = EV_TS_CONST (MAX_BLOCKTIME); + +#if EV_USE_TIMERFD + /* sleep a lot longer when we can reliably detect timejumps */ + if (ecb_expect_true (timerfd >= 0)) + waittime = EV_TS_CONST (MAX_BLOCKTIME2); +#endif +#if !EV_PERIODIC_ENABLE + /* without periodics but with monotonic clock there is no need */ + /* for any time jump detection, so sleep longer */ + if (ecb_expect_true (have_monotonic)) + waittime = EV_TS_CONST (MAX_BLOCKTIME2); +#endif if (timercnt) { @@ -3700,23 +4158,28 @@ ev_run (EV_P_ int flags) #endif /* don't let timeouts decrease the waittime below timeout_blocktime */ - if (expect_false (waittime < timeout_blocktime)) + if (ecb_expect_false (waittime < timeout_blocktime)) waittime = timeout_blocktime; - /* at this point, we NEED to wait, so we have to ensure */ - /* to pass a minimum nonzero value to the backend */ - if (expect_false (waittime < backend_mintime)) - waittime = backend_mintime; + /* now there are two more special cases left, either we have + * already-expired timers, so we should not sleep, or we have timers + * that expire very soon, in which case we need to wait for a minimum + * amount of time for some event loop backends. + */ + if (ecb_expect_false (waittime < backend_mintime)) + waittime = waittime <= EV_TS_CONST (0.) + ? EV_TS_CONST (0.) + : backend_mintime; /* extra check because io_blocktime is commonly 0 */ - if (expect_false (io_blocktime)) + if (ecb_expect_false (io_blocktime)) { sleeptime = io_blocktime - (mn_now - prev_mn_now); if (sleeptime > waittime - backend_mintime) sleeptime = waittime - backend_mintime; - if (expect_true (sleeptime > 0.)) + if (ecb_expect_true (sleeptime > EV_TS_CONST (0.))) { ev_sleep (sleeptime); waittime -= sleeptime; @@ -3740,7 +4203,6 @@ ev_run (EV_P_ int flags) ev_feed_event (EV_A_ &pipe_w, EV_CUSTOM); } - /* update ev_rt_now, do magic */ time_update (EV_A_ waittime + sleeptime); } @@ -3758,13 +4220,13 @@ ev_run (EV_P_ int flags) #if EV_CHECK_ENABLE /* queue check watchers, to be executed first */ - if (expect_false (checkcnt)) + if (ecb_expect_false (checkcnt)) queue_events (EV_A_ (W *)checks, checkcnt, EV_CHECK); #endif EV_INVOKE_PENDING; } - while (expect_true ( + while (ecb_expect_true ( activecnt && !loop_done && !(flags & (EVRUN_ONCE | EVRUN_NOWAIT)) @@ -3798,22 +4260,10 @@ ev_unref (EV_P) EV_NOEXCEPT --activecnt; } -int -ev_active_cnt (EV_P) EV_NOEXCEPT -{ - return activecnt; -} - void ev_now_update (EV_P) EV_NOEXCEPT { - time_update (EV_A_ 1e100); -} - -void -ev_now_update_if_cheap (EV_P) EV_NOEXCEPT -{ - if (have_cheap_timer) time_update (EV_A_ 1e100); + time_update (EV_A_ EV_TSTAMP_HUGE); } void @@ -3850,7 +4300,7 @@ wlist_del (WL *head, WL elem) { while (*head) { - if (expect_true (*head == elem)) + if (ecb_expect_true (*head == elem)) { *head = elem->next; break; @@ -3877,7 +4327,7 @@ ev_clear_pending (EV_P_ void *w) EV_NOEXCEPT W w_ = (W)w; int pending = w_->pending; - if (expect_true (pending)) + if (ecb_expect_true (pending)) { ANPENDING *p = pendings [ABSPRI (w_)] + pending - 1; p->w = (W)&pending_w; @@ -3914,43 +4364,49 @@ ev_stop (EV_P_ W w) /*****************************************************************************/ -noinline +ecb_noinline void ev_io_start (EV_P_ ev_io *w) EV_NOEXCEPT { int fd = w->fd; - if (expect_false (ev_is_active (w))) + if (ecb_expect_false (ev_is_active (w))) return; assert (("libev: ev_io_start called with negative fd", fd >= 0)); assert (("libev: ev_io_start called with illegal event mask", !(w->events & ~(EV__IOFDSET | EV_READ | EV_WRITE)))); +#if EV_VERIFY >= 2 + assert (("libev: ev_io_start called on watcher with invalid fd", fd_valid (fd))); +#endif EV_FREQUENT_CHECK; ev_start (EV_A_ (W)w, 1); - array_needsize (ANFD, anfds, anfdmax, fd + 1, array_init_zero); + array_needsize (ANFD, anfds, anfdmax, fd + 1, array_needsize_zerofill); wlist_add (&anfds[fd].head, (WL)w); /* common bug, apparently */ assert (("libev: ev_io_start called with corrupted watcher", ((WL)w)->next != (WL)w)); - fd_change (EV_A_ fd, w->events & (EV__IOFDSET | EV_ANFD_REIFY)); + fd_change (EV_A_ fd, w->events & EV__IOFDSET | EV_ANFD_REIFY); w->events &= ~EV__IOFDSET; EV_FREQUENT_CHECK; } -noinline +ecb_noinline void ev_io_stop (EV_P_ ev_io *w) EV_NOEXCEPT { clear_pending (EV_A_ (W)w); - if (expect_false (!ev_is_active (w))) + if (ecb_expect_false (!ev_is_active (w))) return; assert (("libev: ev_io_stop called with illegal fd (must stay constant after start!)", w->fd >= 0 && w->fd < anfdmax)); +#if EV_VERIFY >= 2 + assert (("libev: ev_io_stop called on watcher with invalid fd", fd_valid (w->fd))); +#endif EV_FREQUENT_CHECK; wlist_del (&anfds[w->fd].head, (WL)w); @@ -3961,11 +4417,11 @@ ev_io_stop (EV_P_ ev_io *w) EV_NOEXCEPT EV_FREQUENT_CHECK; } -noinline +ecb_noinline void ev_timer_start (EV_P_ ev_timer *w) EV_NOEXCEPT { - if (expect_false (ev_is_active (w))) + if (ecb_expect_false (ev_is_active (w))) return; ev_at (w) += mn_now; @@ -3976,7 +4432,7 @@ ev_timer_start (EV_P_ ev_timer *w) EV_NOEXCEPT ++timercnt; ev_start (EV_A_ (W)w, timercnt + HEAP0 - 1); - array_needsize (ANHE, timers, timermax, ev_active (w) + 1, EMPTY2); + array_needsize (ANHE, timers, timermax, ev_active (w) + 1, array_needsize_noinit); ANHE_w (timers [ev_active (w)]) = (WT)w; ANHE_at_cache (timers [ev_active (w)]); upheap (timers, ev_active (w)); @@ -3986,12 +4442,12 @@ ev_timer_start (EV_P_ ev_timer *w) EV_NOEXCEPT /*assert (("libev: internal timer heap corruption", timers [ev_active (w)] == (WT)w));*/ } -noinline +ecb_noinline void ev_timer_stop (EV_P_ ev_timer *w) EV_NOEXCEPT { clear_pending (EV_A_ (W)w); - if (expect_false (!ev_is_active (w))) + if (ecb_expect_false (!ev_is_active (w))) return; EV_FREQUENT_CHECK; @@ -4003,7 +4459,7 @@ ev_timer_stop (EV_P_ ev_timer *w) EV_NOEXCEPT --timercnt; - if (expect_true (active < timercnt + HEAP0)) + if (ecb_expect_true (active < timercnt + HEAP0)) { timers [active] = timers [timercnt + HEAP0]; adjustheap (timers, timercnt, active); @@ -4017,7 +4473,7 @@ ev_timer_stop (EV_P_ ev_timer *w) EV_NOEXCEPT EV_FREQUENT_CHECK; } -noinline +ecb_noinline void ev_timer_again (EV_P_ ev_timer *w) EV_NOEXCEPT { @@ -4048,17 +4504,22 @@ ev_timer_again (EV_P_ ev_timer *w) EV_NOEXCEPT ev_tstamp ev_timer_remaining (EV_P_ ev_timer *w) EV_NOEXCEPT { - return ev_at (w) - (ev_is_active (w) ? mn_now : 0.); + return ev_at (w) - (ev_is_active (w) ? mn_now : EV_TS_CONST (0.)); } #if EV_PERIODIC_ENABLE -noinline +ecb_noinline void ev_periodic_start (EV_P_ ev_periodic *w) EV_NOEXCEPT { - if (expect_false (ev_is_active (w))) + if (ecb_expect_false (ev_is_active (w))) return; +#if EV_USE_TIMERFD + if (timerfd == -2) + evtimerfd_init (EV_A); +#endif + if (w->reschedule_cb) ev_at (w) = w->reschedule_cb (w, ev_rt_now); else if (w->interval) @@ -4073,7 +4534,7 @@ ev_periodic_start (EV_P_ ev_periodic *w) EV_NOEXCEPT ++periodiccnt; ev_start (EV_A_ (W)w, periodiccnt + HEAP0 - 1); - array_needsize (ANHE, periodics, periodicmax, ev_active (w) + 1, EMPTY2); + array_needsize (ANHE, periodics, periodicmax, ev_active (w) + 1, array_needsize_noinit); ANHE_w (periodics [ev_active (w)]) = (WT)w; ANHE_at_cache (periodics [ev_active (w)]); upheap (periodics, ev_active (w)); @@ -4083,12 +4544,12 @@ ev_periodic_start (EV_P_ ev_periodic *w) EV_NOEXCEPT /*assert (("libev: internal periodic heap corruption", ANHE_w (periodics [ev_active (w)]) == (WT)w));*/ } -noinline +ecb_noinline void ev_periodic_stop (EV_P_ ev_periodic *w) EV_NOEXCEPT { clear_pending (EV_A_ (W)w); - if (expect_false (!ev_is_active (w))) + if (ecb_expect_false (!ev_is_active (w))) return; EV_FREQUENT_CHECK; @@ -4100,7 +4561,7 @@ ev_periodic_stop (EV_P_ ev_periodic *w) EV_NOEXCEPT --periodiccnt; - if (expect_true (active < periodiccnt + HEAP0)) + if (ecb_expect_true (active < periodiccnt + HEAP0)) { periodics [active] = periodics [periodiccnt + HEAP0]; adjustheap (periodics, periodiccnt, active); @@ -4112,7 +4573,7 @@ ev_periodic_stop (EV_P_ ev_periodic *w) EV_NOEXCEPT EV_FREQUENT_CHECK; } -noinline +ecb_noinline void ev_periodic_again (EV_P_ ev_periodic *w) EV_NOEXCEPT { @@ -4128,11 +4589,11 @@ ev_periodic_again (EV_P_ ev_periodic *w) EV_NOEXCEPT #if EV_SIGNAL_ENABLE -noinline +ecb_noinline void ev_signal_start (EV_P_ ev_signal *w) EV_NOEXCEPT { - if (expect_false (ev_is_active (w))) + if (ecb_expect_false (ev_is_active (w))) return; assert (("libev: ev_signal_start called with illegal signal number", w->signum > 0 && w->signum < EV_NSIG)); @@ -4211,12 +4672,12 @@ ev_signal_start (EV_P_ ev_signal *w) EV_NOEXCEPT EV_FREQUENT_CHECK; } -noinline +ecb_noinline void ev_signal_stop (EV_P_ ev_signal *w) EV_NOEXCEPT { clear_pending (EV_A_ (W)w); - if (expect_false (!ev_is_active (w))) + if (ecb_expect_false (!ev_is_active (w))) return; EV_FREQUENT_CHECK; @@ -4259,7 +4720,7 @@ ev_child_start (EV_P_ ev_child *w) EV_NOEXCEPT #if EV_MULTIPLICITY assert (("libev: child watchers are only supported in the default loop", loop == ev_default_loop_ptr)); #endif - if (expect_false (ev_is_active (w))) + if (ecb_expect_false (ev_is_active (w))) return; EV_FREQUENT_CHECK; @@ -4274,7 +4735,7 @@ void ev_child_stop (EV_P_ ev_child *w) EV_NOEXCEPT { clear_pending (EV_A_ (W)w); - if (expect_false (!ev_is_active (w))) + if (ecb_expect_false (!ev_is_active (w))) return; EV_FREQUENT_CHECK; @@ -4298,14 +4759,14 @@ ev_child_stop (EV_P_ ev_child *w) EV_NOEXCEPT #define NFS_STAT_INTERVAL 30.1074891 /* for filesystems potentially failing inotify */ #define MIN_STAT_INTERVAL 0.1074891 -noinline static void stat_timer_cb (EV_P_ ev_timer *w_, int revents); +ecb_noinline static void stat_timer_cb (EV_P_ ev_timer *w_, int revents); #if EV_USE_INOTIFY /* the * 2 is to allow for alignment padding, which for some reason is >> 8 */ # define EV_INOTIFY_BUFSIZE (sizeof (struct inotify_event) * 2 + NAME_MAX) -noinline +ecb_noinline static void infy_add (EV_P_ ev_stat *w) { @@ -4380,7 +4841,7 @@ infy_add (EV_P_ ev_stat *w) if (ev_is_active (&w->timer)) ev_unref (EV_A); } -noinline +ecb_noinline static void infy_del (EV_P_ ev_stat *w) { @@ -4398,7 +4859,7 @@ infy_del (EV_P_ ev_stat *w) inotify_rm_watch (fs_fd, wd); } -noinline +ecb_noinline static void infy_wd (EV_P_ int slot, int wd, struct inotify_event *ev) { @@ -4554,7 +5015,7 @@ ev_stat_stat (EV_P_ ev_stat *w) EV_NOEXCEPT w->attr.st_nlink = 1; } -noinline +ecb_noinline static void stat_timer_cb (EV_P_ ev_timer *w_, int revents) { @@ -4598,7 +5059,7 @@ stat_timer_cb (EV_P_ ev_timer *w_, int revents) void ev_stat_start (EV_P_ ev_stat *w) EV_NOEXCEPT { - if (expect_false (ev_is_active (w))) + if (ecb_expect_false (ev_is_active (w))) return; ev_stat_stat (EV_A_ w); @@ -4630,7 +5091,7 @@ void ev_stat_stop (EV_P_ ev_stat *w) EV_NOEXCEPT { clear_pending (EV_A_ (W)w); - if (expect_false (!ev_is_active (w))) + if (ecb_expect_false (!ev_is_active (w))) return; EV_FREQUENT_CHECK; @@ -4655,7 +5116,7 @@ ev_stat_stop (EV_P_ ev_stat *w) EV_NOEXCEPT void ev_idle_start (EV_P_ ev_idle *w) EV_NOEXCEPT { - if (expect_false (ev_is_active (w))) + if (ecb_expect_false (ev_is_active (w))) return; pri_adjust (EV_A_ (W)w); @@ -4668,7 +5129,7 @@ ev_idle_start (EV_P_ ev_idle *w) EV_NOEXCEPT ++idleall; ev_start (EV_A_ (W)w, active); - array_needsize (ev_idle *, idles [ABSPRI (w)], idlemax [ABSPRI (w)], active, EMPTY2); + array_needsize (ev_idle *, idles [ABSPRI (w)], idlemax [ABSPRI (w)], active, array_needsize_noinit); idles [ABSPRI (w)][active - 1] = w; } @@ -4679,7 +5140,7 @@ void ev_idle_stop (EV_P_ ev_idle *w) EV_NOEXCEPT { clear_pending (EV_A_ (W)w); - if (expect_false (!ev_is_active (w))) + if (ecb_expect_false (!ev_is_active (w))) return; EV_FREQUENT_CHECK; @@ -4702,13 +5163,13 @@ ev_idle_stop (EV_P_ ev_idle *w) EV_NOEXCEPT void ev_prepare_start (EV_P_ ev_prepare *w) EV_NOEXCEPT { - if (expect_false (ev_is_active (w))) + if (ecb_expect_false (ev_is_active (w))) return; EV_FREQUENT_CHECK; ev_start (EV_A_ (W)w, ++preparecnt); - array_needsize (ev_prepare *, prepares, preparemax, preparecnt, EMPTY2); + array_needsize (ev_prepare *, prepares, preparemax, preparecnt, array_needsize_noinit); prepares [preparecnt - 1] = w; EV_FREQUENT_CHECK; @@ -4718,7 +5179,7 @@ void ev_prepare_stop (EV_P_ ev_prepare *w) EV_NOEXCEPT { clear_pending (EV_A_ (W)w); - if (expect_false (!ev_is_active (w))) + if (ecb_expect_false (!ev_is_active (w))) return; EV_FREQUENT_CHECK; @@ -4740,13 +5201,13 @@ ev_prepare_stop (EV_P_ ev_prepare *w) EV_NOEXCEPT void ev_check_start (EV_P_ ev_check *w) EV_NOEXCEPT { - if (expect_false (ev_is_active (w))) + if (ecb_expect_false (ev_is_active (w))) return; EV_FREQUENT_CHECK; ev_start (EV_A_ (W)w, ++checkcnt); - array_needsize (ev_check *, checks, checkmax, checkcnt, EMPTY2); + array_needsize (ev_check *, checks, checkmax, checkcnt, array_needsize_noinit); checks [checkcnt - 1] = w; EV_FREQUENT_CHECK; @@ -4756,7 +5217,7 @@ void ev_check_stop (EV_P_ ev_check *w) EV_NOEXCEPT { clear_pending (EV_A_ (W)w); - if (expect_false (!ev_is_active (w))) + if (ecb_expect_false (!ev_is_active (w))) return; EV_FREQUENT_CHECK; @@ -4775,7 +5236,7 @@ ev_check_stop (EV_P_ ev_check *w) EV_NOEXCEPT #endif #if EV_EMBED_ENABLE -noinline +ecb_noinline void ev_embed_sweep (EV_P_ ev_embed *w) EV_NOEXCEPT { @@ -4809,6 +5270,7 @@ embed_prepare_cb (EV_P_ ev_prepare *prepare, int revents) } } +#if EV_FORK_ENABLE static void embed_fork_cb (EV_P_ ev_fork *fork_w, int revents) { @@ -4825,6 +5287,7 @@ embed_fork_cb (EV_P_ ev_fork *fork_w, int revents) ev_embed_start (EV_A_ w); } +#endif #if 0 static void @@ -4837,7 +5300,7 @@ embed_idle_cb (EV_P_ ev_idle *idle, int revents) void ev_embed_start (EV_P_ ev_embed *w) EV_NOEXCEPT { - if (expect_false (ev_is_active (w))) + if (ecb_expect_false (ev_is_active (w))) return; { @@ -4855,8 +5318,10 @@ ev_embed_start (EV_P_ ev_embed *w) EV_NOEXCEPT ev_set_priority (&w->prepare, EV_MINPRI); ev_prepare_start (EV_A_ &w->prepare); +#if EV_FORK_ENABLE ev_fork_init (&w->fork, embed_fork_cb); ev_fork_start (EV_A_ &w->fork); +#endif /*ev_idle_init (&w->idle, e,bed_idle_cb);*/ @@ -4869,14 +5334,16 @@ void ev_embed_stop (EV_P_ ev_embed *w) EV_NOEXCEPT { clear_pending (EV_A_ (W)w); - if (expect_false (!ev_is_active (w))) + if (ecb_expect_false (!ev_is_active (w))) return; EV_FREQUENT_CHECK; ev_io_stop (EV_A_ &w->io); ev_prepare_stop (EV_A_ &w->prepare); +#if EV_FORK_ENABLE ev_fork_stop (EV_A_ &w->fork); +#endif ev_stop (EV_A_ (W)w); @@ -4888,13 +5355,13 @@ ev_embed_stop (EV_P_ ev_embed *w) EV_NOEXCEPT void ev_fork_start (EV_P_ ev_fork *w) EV_NOEXCEPT { - if (expect_false (ev_is_active (w))) + if (ecb_expect_false (ev_is_active (w))) return; EV_FREQUENT_CHECK; ev_start (EV_A_ (W)w, ++forkcnt); - array_needsize (ev_fork *, forks, forkmax, forkcnt, EMPTY2); + array_needsize (ev_fork *, forks, forkmax, forkcnt, array_needsize_noinit); forks [forkcnt - 1] = w; EV_FREQUENT_CHECK; @@ -4904,7 +5371,7 @@ void ev_fork_stop (EV_P_ ev_fork *w) EV_NOEXCEPT { clear_pending (EV_A_ (W)w); - if (expect_false (!ev_is_active (w))) + if (ecb_expect_false (!ev_is_active (w))) return; EV_FREQUENT_CHECK; @@ -4926,13 +5393,13 @@ ev_fork_stop (EV_P_ ev_fork *w) EV_NOEXCEPT void ev_cleanup_start (EV_P_ ev_cleanup *w) EV_NOEXCEPT { - if (expect_false (ev_is_active (w))) + if (ecb_expect_false (ev_is_active (w))) return; EV_FREQUENT_CHECK; ev_start (EV_A_ (W)w, ++cleanupcnt); - array_needsize (ev_cleanup *, cleanups, cleanupmax, cleanupcnt, EMPTY2); + array_needsize (ev_cleanup *, cleanups, cleanupmax, cleanupcnt, array_needsize_noinit); cleanups [cleanupcnt - 1] = w; /* cleanup watchers should never keep a refcount on the loop */ @@ -4944,7 +5411,7 @@ void ev_cleanup_stop (EV_P_ ev_cleanup *w) EV_NOEXCEPT { clear_pending (EV_A_ (W)w); - if (expect_false (!ev_is_active (w))) + if (ecb_expect_false (!ev_is_active (w))) return; EV_FREQUENT_CHECK; @@ -4967,7 +5434,7 @@ ev_cleanup_stop (EV_P_ ev_cleanup *w) EV_NOEXCEPT void ev_async_start (EV_P_ ev_async *w) EV_NOEXCEPT { - if (expect_false (ev_is_active (w))) + if (ecb_expect_false (ev_is_active (w))) return; w->sent = 0; @@ -4977,7 +5444,7 @@ ev_async_start (EV_P_ ev_async *w) EV_NOEXCEPT EV_FREQUENT_CHECK; ev_start (EV_A_ (W)w, ++asynccnt); - array_needsize (ev_async *, asyncs, asyncmax, asynccnt, EMPTY2); + array_needsize (ev_async *, asyncs, asyncmax, asynccnt, array_needsize_noinit); asyncs [asynccnt - 1] = w; EV_FREQUENT_CHECK; @@ -4987,7 +5454,7 @@ void ev_async_stop (EV_P_ ev_async *w) EV_NOEXCEPT { clear_pending (EV_A_ (W)w); - if (expect_false (!ev_is_active (w))) + if (ecb_expect_false (!ev_is_active (w))) return; EV_FREQUENT_CHECK; @@ -5056,12 +5523,6 @@ ev_once (EV_P_ int fd, int events, ev_tstamp timeout, void (*cb)(int revents, vo { struct ev_once *once = (struct ev_once *)ev_malloc (sizeof (struct ev_once)); - if (expect_false (!once)) - { - cb (EV_ERROR | EV_READ | EV_WRITE | EV_TIMER, arg); - return; - } - once->cb = cb; once->arg = arg; @@ -5079,7 +5540,6 @@ ev_once (EV_P_ int fd, int events, ev_tstamp timeout, void (*cb)(int revents, vo ev_timer_start (EV_A_ &once->to); } } - /*****************************************************************************/ #if EV_WALK_ENABLE @@ -5192,8 +5652,15 @@ ev_walk (EV_P_ int types, void (*cb)(EV_P_ int type, void *w)) EV_NOEXCEPT wl = wn; } #endif +/* EV_STAT 0x00001000 /* stat data changed */ +/* EV_EMBED 0x00010000 /* embedded event loop needs sweep */ } #endif +void +ev_now_update_if_cheap (EV_P) EV_NOEXCEPT +{ + if (have_cheap_timer) time_update (EV_A_ 1e100); +} #if EV_MULTIPLICITY #include "ev_wrap.h" diff --git a/contrib/libev/ev.h b/contrib/libev/ev.h index 9d3e4f2fd..7135a08a5 100644 --- a/contrib/libev/ev.h +++ b/contrib/libev/ev.h @@ -1,7 +1,7 @@ /* * libev native API header * - * Copyright (c) 2007-2018 Marc Alexander Lehmann <libev@schmorp.de> + * Copyright (c) 2007-2020 Marc Alexander Lehmann <libev@schmorp.de> * All rights reserved. * * Redistribution and use in source and binary forms, with or without modifica- @@ -151,7 +151,10 @@ EV_CPP(extern "C" {) /*****************************************************************************/ -typedef double ev_tstamp; +#ifndef EV_TSTAMP_T +# define EV_TSTAMP_T double +#endif +typedef EV_TSTAMP_T ev_tstamp; #include <string.h> /* for memmove */ @@ -212,7 +215,7 @@ struct ev_loop; /*****************************************************************************/ #define EV_VERSION_MAJOR 4 -#define EV_VERSION_MINOR 25 +#define EV_VERSION_MINOR 33 /* eventmask, revents, events... */ enum { @@ -389,14 +392,12 @@ typedef struct ev_stat } ev_stat; #endif -#if EV_IDLE_ENABLE /* invoked when the nothing else needs to be done, keeps the process from blocking */ /* revent EV_IDLE */ typedef struct ev_idle { EV_WATCHER (ev_idle) } ev_idle; -#endif /* invoked for each run of the mainloop, just before the blocking call */ /* you can still change events in any way you like */ @@ -413,23 +414,19 @@ typedef struct ev_check EV_WATCHER (ev_check) } ev_check; -#if EV_FORK_ENABLE /* the callback gets invoked before check in the child process when a fork was detected */ /* revent EV_FORK */ typedef struct ev_fork { EV_WATCHER (ev_fork) } ev_fork; -#endif -#if EV_CLEANUP_ENABLE /* is invoked just before the loop gets destroyed */ /* revent EV_CLEANUP */ typedef struct ev_cleanup { EV_WATCHER (ev_cleanup) } ev_cleanup; -#endif #if EV_EMBED_ENABLE /* used to embed an event loop inside another */ @@ -439,16 +436,18 @@ typedef struct ev_embed EV_WATCHER (ev_embed) struct ev_loop *other; /* ro */ +#undef EV_IO_ENABLE +#define EV_IO_ENABLE 1 ev_io io; /* private */ +#undef EV_PREPARE_ENABLE +#define EV_PREPARE_ENABLE 1 ev_prepare prepare; /* private */ ev_check check; /* unused */ ev_timer timer; /* unused */ ev_periodic periodic; /* unused */ ev_idle idle; /* unused */ ev_fork fork; /* private */ -#if EV_CLEANUP_ENABLE ev_cleanup cleanup; /* unused */ -#endif } ev_embed; #endif @@ -501,29 +500,32 @@ union ev_any_watcher /* flag bits for ev_default_loop and ev_loop_new */ enum { /* the default */ - EVFLAG_AUTO = 0x00000000U, /* not quite a mask */ + EVFLAG_AUTO = 0x00000000U, /* not quite a mask */ /* flag bits */ - EVFLAG_NOENV = 0x01000000U, /* do NOT consult environment */ - EVFLAG_FORKCHECK = 0x02000000U, /* check for a fork in each iteration */ + EVFLAG_NOENV = 0x01000000U, /* do NOT consult environment */ + EVFLAG_FORKCHECK = 0x02000000U, /* check for a fork in each iteration */ /* debugging/feature disable */ - EVFLAG_NOINOTIFY = 0x00100000U, /* do not attempt to use inotify */ + EVFLAG_NOINOTIFY = 0x00100000U, /* do not attempt to use inotify */ #if EV_COMPAT3 - EVFLAG_NOSIGFD = 0, /* compatibility to pre-3.9 */ + EVFLAG_NOSIGFD = 0, /* compatibility to pre-3.9 */ #endif - EVFLAG_SIGNALFD = 0x00200000U, /* attempt to use signalfd */ - EVFLAG_NOSIGMASK = 0x00400000U /* avoid modifying the signal mask */ + EVFLAG_SIGNALFD = 0x00200000U, /* attempt to use signalfd */ + EVFLAG_NOSIGMASK = 0x00400000U, /* avoid modifying the signal mask */ + EVFLAG_NOTIMERFD = 0x00800000U /* avoid creating a timerfd */ }; /* method bits to be ored together */ enum { - EVBACKEND_SELECT = 0x00000001U, /* available just about anywhere */ - EVBACKEND_POLL = 0x00000002U, /* !win, !aix, broken on osx */ - EVBACKEND_EPOLL = 0x00000004U, /* linux */ - EVBACKEND_KQUEUE = 0x00000008U, /* bsd, broken on osx */ - EVBACKEND_DEVPOLL = 0x00000010U, /* solaris 8 */ /* NYI */ - EVBACKEND_PORT = 0x00000020U, /* solaris 10 */ - EVBACKEND_ALL = 0x0000003FU, /* all known backends */ - EVBACKEND_MASK = 0x0000FFFFU /* all future backends */ + EVBACKEND_SELECT = 0x00000001U, /* available just about anywhere */ + EVBACKEND_POLL = 0x00000002U, /* !win, !aix, broken on osx */ + EVBACKEND_EPOLL = 0x00000004U, /* linux */ + EVBACKEND_KQUEUE = 0x00000008U, /* bsd, broken on osx */ + EVBACKEND_DEVPOLL = 0x00000010U, /* solaris 8 */ /* NYI */ + EVBACKEND_PORT = 0x00000020U, /* solaris 10 */ + EVBACKEND_LINUXAIO = 0x00000040U, /* linux AIO, 4.19+ */ + EVBACKEND_IOURING = 0x00000080U, /* linux io_uring, 5.1+ */ + EVBACKEND_ALL = 0x000000FFU, /* all known backends */ + EVBACKEND_MASK = 0x0000FFFFU /* all future backends */ }; #if EV_PROTOTYPES @@ -557,7 +559,6 @@ EV_API_DECL void ev_set_syserr_cb (void (*cb)(const char *msg) EV_NOEXCEPT) EV_N /* you can call this as often as you like */ EV_API_DECL struct ev_loop *ev_default_loop (unsigned int flags EV_CPP (= 0)) EV_NOEXCEPT; - /* create and destroy alternative loops that don't handle signals */ EV_API_DECL struct ev_loop *ev_loop_new (unsigned int flags EV_CPP (= 0)) EV_NOEXCEPT; @@ -643,6 +644,8 @@ EV_API_DECL int ev_active_cnt (EV_P) EV_NOEXCEPT; */ EV_API_DECL void ev_once (EV_P_ int fd, int events, ev_tstamp timeout, void (*cb)(int revents, void *arg), void *arg) EV_NOEXCEPT; +EV_API_DECL void ev_invoke_pending (EV_P); /* invoke all pending watchers */ + # if EV_FEATURE_API EV_API_DECL unsigned int ev_iteration (EV_P) EV_NOEXCEPT; /* number of loop iterations */ EV_API_DECL unsigned int ev_depth (EV_P) EV_NOEXCEPT; /* #ev_loop enters - #ev_loop leaves */ @@ -660,7 +663,6 @@ EV_API_DECL void ev_set_invoke_pending_cb (EV_P_ ev_loop_callback invoke_pending EV_API_DECL void ev_set_loop_release_cb (EV_P_ void (*release)(EV_P) EV_NOEXCEPT, void (*acquire)(EV_P) EV_NOEXCEPT) EV_NOEXCEPT; EV_API_DECL unsigned int ev_pending_count (EV_P) EV_NOEXCEPT; /* number of pending events, if any */ -EV_API_DECL void ev_invoke_pending (EV_P); /* invoke all pending watchers */ /* * stop/start the timer handling. @@ -680,6 +682,7 @@ EV_API_DECL void ev_resume (EV_P) EV_NOEXCEPT; ev_set_cb ((ev), cb_); \ } while (0) +#define ev_io_modify(ev,events_) do { (ev)->events = (ev)->events & EV__IOFDSET | (events_); } while (0) #define ev_io_set(ev,fd_,events_) do { (ev)->fd = (fd_); (ev)->events = (events_) | EV__IOFDSET; } while (0) #define ev_timer_set(ev,after_,repeat_) do { ((ev_watcher_time *)(ev))->at = (after_); (ev)->repeat = (repeat_); } while (0) #define ev_periodic_set(ev,ofs_,ival_,rcb_) do { (ev)->offset = (ofs_); (ev)->interval = (ival_); (ev)->reschedule_cb = (rcb_); } while (0) @@ -712,7 +715,6 @@ EV_API_DECL void ev_resume (EV_P) EV_NOEXCEPT; #define ev_is_active(ev) (0 + ((ev_watcher *)(void *)(ev))->active) /* ro, true when the watcher has been started */ #define ev_can_stop(ev) (ev_is_pending(ev) || ev_is_active(ev)) /* ro, true when the watcher has been started */ - #define ev_cb_(ev) (ev)->cb /* rw */ #define ev_cb(ev) (memmove (&ev_cb_ (ev), &((ev_watcher *)(ev))->cb, sizeof (ev_cb_ (ev))), (ev)->cb) @@ -727,6 +729,7 @@ EV_API_DECL void ev_resume (EV_P) EV_NOEXCEPT; #define ev_periodic_at(ev) (+((ev_watcher_time *)(ev))->at) #ifndef ev_set_cb +/* memmove is used here to avoid strict aliasing violations, and hopefully is optimized out by any reasonable compiler */ # define ev_set_cb(ev,cb_) (ev_cb_ (ev) = (cb_), memmove (&((ev_watcher *)(ev))->cb, &ev_cb_ (ev), sizeof (ev_cb_ (ev)))) #endif diff --git a/contrib/libev/ev_epoll.c b/contrib/libev/ev_epoll.c index 6aef86222..58cfa684d 100644 --- a/contrib/libev/ev_epoll.c +++ b/contrib/libev/ev_epoll.c @@ -1,7 +1,7 @@ /* * libev epoll fd activity backend * - * Copyright (c) 2007,2008,2009,2010,2011 Marc Alexander Lehmann <libev@schmorp.de> + * Copyright (c) 2007,2008,2009,2010,2011,2016,2017,2019 Marc Alexander Lehmann <libev@schmorp.de> * All rights reserved. * * Redistribution and use in source and binary forms, with or without modifica- @@ -93,10 +93,10 @@ epoll_modify (EV_P_ int fd, int oev, int nev) ev.events = (nev & EV_READ ? EPOLLIN : 0) | (nev & EV_WRITE ? EPOLLOUT : 0); - if (expect_true (!epoll_ctl (backend_fd, oev && oldmask != nev ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, fd, &ev))) + if (ecb_expect_true (!epoll_ctl (backend_fd, oev && oldmask != nev ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, fd, &ev))) return; - if (expect_true (errno == ENOENT)) + if (ecb_expect_true (errno == ENOENT)) { /* if ENOENT then the fd went away, so try to do the right thing */ if (!nev) @@ -105,7 +105,7 @@ epoll_modify (EV_P_ int fd, int oev, int nev) if (!epoll_ctl (backend_fd, EPOLL_CTL_ADD, fd, &ev)) return; } - else if (expect_true (errno == EEXIST)) + else if (ecb_expect_true (errno == EEXIST)) { /* EEXIST means we ignored a previous DEL, but the fd is still active */ /* if the kernel mask is the same as the new mask, we assume it hasn't changed */ @@ -115,7 +115,7 @@ epoll_modify (EV_P_ int fd, int oev, int nev) if (!epoll_ctl (backend_fd, EPOLL_CTL_MOD, fd, &ev)) return; } - else if (expect_true (errno == EPERM)) + else if (ecb_expect_true (errno == EPERM)) { /* EPERM means the fd is always ready, but epoll is too snobbish */ /* to handle it, unlike select or poll. */ @@ -124,12 +124,14 @@ epoll_modify (EV_P_ int fd, int oev, int nev) /* add fd to epoll_eperms, if not already inside */ if (!(oldmask & EV_EMASK_EPERM)) { - array_needsize (int, epoll_eperms, epoll_epermmax, epoll_epermcnt + 1, EMPTY2); + array_needsize (int, epoll_eperms, epoll_epermmax, epoll_epermcnt + 1, array_needsize_noinit); epoll_eperms [epoll_epermcnt++] = fd; } return; } + else + assert (("libev: I/O watcher with invalid fd found in epoll_ctl", errno != EBADF && errno != ELOOP && errno != EINVAL)); fd_kill (EV_A_ fd); @@ -144,16 +146,16 @@ epoll_poll (EV_P_ ev_tstamp timeout) int i; int eventcnt; - if (expect_false (epoll_epermcnt)) - timeout = 0.; + if (ecb_expect_false (epoll_epermcnt)) + timeout = EV_TS_CONST (0.); /* epoll wait times cannot be larger than (LONG_MAX - 999UL) / HZ msecs, which is below */ /* the default libev max wait time, however. */ EV_RELEASE_CB; - eventcnt = epoll_wait (backend_fd, epoll_events, epoll_eventmax, timeout * 1e3); + eventcnt = epoll_wait (backend_fd, epoll_events, epoll_eventmax, EV_TS_TO_MSEC (timeout)); EV_ACQUIRE_CB; - if (expect_false (eventcnt < 0)) + if (ecb_expect_false (eventcnt < 0)) { if (errno != EINTR) ev_syserr ("(libev) epoll_wait"); @@ -176,14 +178,14 @@ epoll_poll (EV_P_ ev_tstamp timeout) * other spurious notifications will be found by epoll_ctl, below * we assume that fd is always in range, as we never shrink the anfds array */ - if (expect_false ((uint32_t)anfds [fd].egen != (uint32_t)(ev->data.u64 >> 32))) + if (ecb_expect_false ((uint32_t)anfds [fd].egen != (uint32_t)(ev->data.u64 >> 32))) { /* recreate kernel state */ postfork |= 2; continue; } - if (expect_false (got & ~want)) + if (ecb_expect_false (got & ~want)) { anfds [fd].emask = want; @@ -195,6 +197,8 @@ epoll_poll (EV_P_ ev_tstamp timeout) * above with the gencounter check (== our fd is not the event fd), and * partially here, when epoll_ctl returns an error (== a child has the fd * but we closed it). + * note: for events such as POLLHUP, where we can't know whether it refers + * to EV_READ or EV_WRITE, we might issue redundant EPOLL_CTL_MOD calls. */ ev->events = (want & EV_READ ? EPOLLIN : 0) | (want & EV_WRITE ? EPOLLOUT : 0); @@ -212,7 +216,7 @@ epoll_poll (EV_P_ ev_tstamp timeout) } /* if the receive array was full, increase its size */ - if (expect_false (eventcnt == epoll_eventmax)) + if (ecb_expect_false (eventcnt == epoll_eventmax)) { ev_free (epoll_events); epoll_eventmax = array_nextsize (sizeof (struct epoll_event), epoll_eventmax, epoll_eventmax + 1); @@ -235,23 +239,34 @@ epoll_poll (EV_P_ ev_tstamp timeout) } } -inline_size -int -epoll_init (EV_P_ int flags) +static int +epoll_epoll_create (void) { + int fd; + #if defined EPOLL_CLOEXEC && !defined __ANDROID__ - backend_fd = epoll_create1 (EPOLL_CLOEXEC); + fd = epoll_create1 (EPOLL_CLOEXEC); - if (backend_fd < 0 && (errno == EINVAL || errno == ENOSYS)) + if (fd < 0 && (errno == EINVAL || errno == ENOSYS)) #endif - backend_fd = epoll_create (256); + { + fd = epoll_create (256); - if (backend_fd < 0) - return 0; + if (fd >= 0) + fcntl (fd, F_SETFD, FD_CLOEXEC); + } + + return fd; +} - fcntl (backend_fd, F_SETFD, FD_CLOEXEC); +inline_size +int +epoll_init (EV_P_ int flags) +{ + if ((backend_fd = epoll_epoll_create ()) < 0) + return 0; - backend_mintime = 1e-3; /* epoll does sometimes return early, this is just to avoid the worst */ + backend_mintime = EV_TS_CONST (1e-3); /* epoll does sometimes return early, this is just to avoid the worst */ backend_modify = epoll_modify; backend_poll = epoll_poll; @@ -269,17 +284,15 @@ epoll_destroy (EV_P) array_free (epoll_eperm, EMPTY); } -inline_size -void +ecb_cold +static void epoll_fork (EV_P) { close (backend_fd); - while ((backend_fd = epoll_create (256)) < 0) + while ((backend_fd = epoll_epoll_create ()) < 0) ev_syserr ("(libev) epoll_create"); - fcntl (backend_fd, F_SETFD, FD_CLOEXEC); - fd_rearm_all (EV_A); } diff --git a/contrib/libev/ev_iouring.c b/contrib/libev/ev_iouring.c new file mode 100644 index 000000000..bfd3de65f --- /dev/null +++ b/contrib/libev/ev_iouring.c @@ -0,0 +1,694 @@ +/* + * libev linux io_uring fd activity backend + * + * Copyright (c) 2019-2020 Marc Alexander Lehmann <libev@schmorp.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +/* + * general notes about linux io_uring: + * + * a) it's the best interface I have seen so far. on linux. + * b) best is not necessarily very good. + * c) it's better than the aio mess, doesn't suffer from the fork problems + * of linux aio or epoll and so on and so on. and you could do event stuff + * without any syscalls. what's not to like? + * d) ok, it's vastly more complex, but that's ok, really. + * e) why two mmaps instead of one? one would be more space-efficient, + * and I can't see what benefit two would have (other than being + * somehow resizable/relocatable, but that's apparently not possible). + * f) hmm, it's practically undebuggable (gdb can't access the memory, and + * the bizarre way structure offsets are communicated makes it hard to + * just print the ring buffer heads, even *iff* the memory were visible + * in gdb. but then, that's also ok, really. + * g) well, you cannot specify a timeout when waiting for events. no, + * seriously, the interface doesn't support a timeout. never seen _that_ + * before. sure, you can use a timerfd, but that's another syscall + * you could have avoided. overall, this bizarre omission smells + * like a µ-optimisation by the io_uring author for his personal + * applications, to the detriment of everybody else who just wants + * an event loop. but, umm, ok, if that's all, it could be worse. + * (from what I gather from the author Jens Axboe, it simply didn't + * occur to him, and he made good on it by adding an unlimited nuber + * of timeouts later :). + * h) initially there was a hardcoded limit of 4096 outstanding events. + * later versions not only bump this to 32k, but also can handle + * an unlimited amount of events, so this only affects the batch size. + * i) unlike linux aio, you *can* register more then the limit + * of fd events. while early verisons of io_uring signalled an overflow + * and you ended up getting wet. 5.5+ does not do this anymore. + * j) but, oh my! it had exactly the same bugs as the linux aio backend, + * where some undocumented poll combinations just fail. fortunately, + * after finally reaching the author, he was more than willing to fix + * this probably in 5.6+. + * k) overall, the *API* itself is, I dare to say, not a total trainwreck. + * once the bugs ae fixed (probably in 5.6+), it will be without + * competition. + */ + +/* TODO: use internal TIMEOUT */ +/* TODO: take advantage of single mmap, NODROP etc. */ +/* TODO: resize cq/sq size independently */ + +#include <sys/timerfd.h> +#include <sys/mman.h> +#include <poll.h> +#include <stdint.h> + +#define IOURING_INIT_ENTRIES 32 + +/*****************************************************************************/ +/* syscall wrapdadoop - this section has the raw api/abi definitions */ + +#include <linux/fs.h> +#include <linux/types.h> + +/* mostly directly taken from the kernel or documentation */ + +struct io_uring_sqe +{ + __u8 opcode; + __u8 flags; + __u16 ioprio; + __s32 fd; + union { + __u64 off; + __u64 addr2; + }; + __u64 addr; + __u32 len; + union { + __kernel_rwf_t rw_flags; + __u32 fsync_flags; + __u16 poll_events; + __u32 sync_range_flags; + __u32 msg_flags; + __u32 timeout_flags; + __u32 accept_flags; + __u32 cancel_flags; + __u32 open_flags; + __u32 statx_flags; + }; + __u64 user_data; + union { + __u16 buf_index; + __u64 __pad2[3]; + }; +}; + +struct io_uring_cqe +{ + __u64 user_data; + __s32 res; + __u32 flags; +}; + +struct io_sqring_offsets +{ + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 flags; + __u32 dropped; + __u32 array; + __u32 resv1; + __u64 resv2; +}; + +struct io_cqring_offsets +{ + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 overflow; + __u32 cqes; + __u64 resv[2]; +}; + +struct io_uring_params +{ + __u32 sq_entries; + __u32 cq_entries; + __u32 flags; + __u32 sq_thread_cpu; + __u32 sq_thread_idle; + __u32 features; + __u32 resv[4]; + struct io_sqring_offsets sq_off; + struct io_cqring_offsets cq_off; +}; + +#define IORING_SETUP_CQSIZE 0x00000008 + +#define IORING_OP_POLL_ADD 6 +#define IORING_OP_POLL_REMOVE 7 +#define IORING_OP_TIMEOUT 11 +#define IORING_OP_TIMEOUT_REMOVE 12 + +/* relative or absolute, reference clock is CLOCK_MONOTONIC */ +struct iouring_kernel_timespec +{ + int64_t tv_sec; + long long tv_nsec; +}; + +#define IORING_TIMEOUT_ABS 0x00000001 + +#define IORING_ENTER_GETEVENTS 0x01 + +#define IORING_OFF_SQ_RING 0x00000000ULL +#define IORING_OFF_CQ_RING 0x08000000ULL +#define IORING_OFF_SQES 0x10000000ULL + +#define IORING_FEAT_SINGLE_MMAP 0x00000001 +#define IORING_FEAT_NODROP 0x00000002 +#define IORING_FEAT_SUBMIT_STABLE 0x00000004 + +inline_size +int +evsys_io_uring_setup (unsigned entries, struct io_uring_params *params) +{ + return ev_syscall2 (SYS_io_uring_setup, entries, params); +} + +inline_size +int +evsys_io_uring_enter (int fd, unsigned to_submit, unsigned min_complete, unsigned flags, const sigset_t *sig, size_t sigsz) +{ + return ev_syscall6 (SYS_io_uring_enter, fd, to_submit, min_complete, flags, sig, sigsz); +} + +/*****************************************************************************/ +/* actual backed implementation */ + +/* we hope that volatile will make the compiler access this variables only once */ +#define EV_SQ_VAR(name) *(volatile unsigned *)((char *)iouring_sq_ring + iouring_sq_ ## name) +#define EV_CQ_VAR(name) *(volatile unsigned *)((char *)iouring_cq_ring + iouring_cq_ ## name) + +/* the index array */ +#define EV_SQ_ARRAY ((unsigned *)((char *)iouring_sq_ring + iouring_sq_array)) + +/* the submit/completion queue entries */ +#define EV_SQES ((struct io_uring_sqe *) iouring_sqes) +#define EV_CQES ((struct io_uring_cqe *)((char *)iouring_cq_ring + iouring_cq_cqes)) + +inline_speed +int +iouring_enter (EV_P_ ev_tstamp timeout) +{ + int res; + + EV_RELEASE_CB; + + res = evsys_io_uring_enter (iouring_fd, iouring_to_submit, 1, + timeout > EV_TS_CONST (0.) ? IORING_ENTER_GETEVENTS : 0, 0, 0); + + assert (("libev: io_uring_enter did not consume all sqes", (res < 0 || res == iouring_to_submit))); + + iouring_to_submit = 0; + + EV_ACQUIRE_CB; + + return res; +} + +/* TODO: can we move things around so we don't need this forward-reference? */ +static void +iouring_poll (EV_P_ ev_tstamp timeout); + +static +struct io_uring_sqe * +iouring_sqe_get (EV_P) +{ + unsigned tail; + + for (;;) + { + tail = EV_SQ_VAR (tail); + + if (ecb_expect_true (tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries))) + break; /* whats the problem, we have free sqes */ + + /* queue full, need to flush and possibly handle some events */ + +#if EV_FEATURE_CODE + /* first we ask the kernel nicely, most often this frees up some sqes */ + int res = iouring_enter (EV_A_ EV_TS_CONST (0.)); + + ECB_MEMORY_FENCE_ACQUIRE; /* better safe than sorry */ + + if (res >= 0) + continue; /* yes, it worked, try again */ +#endif + + /* some problem, possibly EBUSY - do the full poll and let it handle any issues */ + + iouring_poll (EV_A_ EV_TS_CONST (0.)); + /* iouring_poll should have done ECB_MEMORY_FENCE_ACQUIRE for us */ + } + + /*assert (("libev: io_uring queue full after flush", tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries)));*/ + + return EV_SQES + (tail & EV_SQ_VAR (ring_mask)); +} + +inline_size +struct io_uring_sqe * +iouring_sqe_submit (EV_P_ struct io_uring_sqe *sqe) +{ + unsigned idx = sqe - EV_SQES; + + EV_SQ_ARRAY [idx] = idx; + ECB_MEMORY_FENCE_RELEASE; + ++EV_SQ_VAR (tail); + /*ECB_MEMORY_FENCE_RELEASE; /* for the time being we assume this is not needed */ + ++iouring_to_submit; +} + +/*****************************************************************************/ + +/* when the timerfd expires we simply note the fact, + * as the purpose of the timerfd is to wake us up, nothing else. + * the next iteration should re-set it. + */ +static void +iouring_tfd_cb (EV_P_ struct ev_io *w, int revents) +{ + iouring_tfd_to = EV_TSTAMP_HUGE; +} + +/* called for full and partial cleanup */ +ecb_cold +static int +iouring_internal_destroy (EV_P) +{ + close (iouring_tfd); + close (iouring_fd); + + if (iouring_sq_ring != MAP_FAILED) munmap (iouring_sq_ring, iouring_sq_ring_size); + if (iouring_cq_ring != MAP_FAILED) munmap (iouring_cq_ring, iouring_cq_ring_size); + if (iouring_sqes != MAP_FAILED) munmap (iouring_sqes , iouring_sqes_size ); + + if (ev_is_active (&iouring_tfd_w)) + { + ev_ref (EV_A); + ev_io_stop (EV_A_ &iouring_tfd_w); + } +} + +ecb_cold +static int +iouring_internal_init (EV_P) +{ + struct io_uring_params params = { 0 }; + + iouring_to_submit = 0; + + iouring_tfd = -1; + iouring_sq_ring = MAP_FAILED; + iouring_cq_ring = MAP_FAILED; + iouring_sqes = MAP_FAILED; + + if (!have_monotonic) /* cannot really happen, but what if11 */ + return -1; + + for (;;) + { + iouring_fd = evsys_io_uring_setup (iouring_entries, ¶ms); + + if (iouring_fd >= 0) + break; /* yippie */ + + if (errno != EINVAL) + return -1; /* we failed */ + +#if TODO + if ((~params.features) & (IORING_FEAT_NODROP | IORING_FEATURE_SINGLE_MMAP | IORING_FEAT_SUBMIT_STABLE)) + return -1; /* we require the above features */ +#endif + + /* EINVAL: lots of possible reasons, but maybe + * it is because we hit the unqueryable hardcoded size limit + */ + + /* we hit the limit already, give up */ + if (iouring_max_entries) + return -1; + + /* first time we hit EINVAL? assume we hit the limit, so go back and retry */ + iouring_entries >>= 1; + iouring_max_entries = iouring_entries; + } + + iouring_sq_ring_size = params.sq_off.array + params.sq_entries * sizeof (unsigned); + iouring_cq_ring_size = params.cq_off.cqes + params.cq_entries * sizeof (struct io_uring_cqe); + iouring_sqes_size = params.sq_entries * sizeof (struct io_uring_sqe); + + iouring_sq_ring = mmap (0, iouring_sq_ring_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, iouring_fd, IORING_OFF_SQ_RING); + iouring_cq_ring = mmap (0, iouring_cq_ring_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, iouring_fd, IORING_OFF_CQ_RING); + iouring_sqes = mmap (0, iouring_sqes_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, iouring_fd, IORING_OFF_SQES); + + if (iouring_sq_ring == MAP_FAILED || iouring_cq_ring == MAP_FAILED || iouring_sqes == MAP_FAILED) + return -1; + + iouring_sq_head = params.sq_off.head; + iouring_sq_tail = params.sq_off.tail; + iouring_sq_ring_mask = params.sq_off.ring_mask; + iouring_sq_ring_entries = params.sq_off.ring_entries; + iouring_sq_flags = params.sq_off.flags; + iouring_sq_dropped = params.sq_off.dropped; + iouring_sq_array = params.sq_off.array; + + iouring_cq_head = params.cq_off.head; + iouring_cq_tail = params.cq_off.tail; + iouring_cq_ring_mask = params.cq_off.ring_mask; + iouring_cq_ring_entries = params.cq_off.ring_entries; + iouring_cq_overflow = params.cq_off.overflow; + iouring_cq_cqes = params.cq_off.cqes; + + iouring_tfd = timerfd_create (CLOCK_MONOTONIC, TFD_CLOEXEC); + + if (iouring_tfd < 0) + return iouring_tfd; + + iouring_tfd_to = EV_TSTAMP_HUGE; + + return 0; +} + +ecb_cold +static void +iouring_fork (EV_P) +{ + iouring_internal_destroy (EV_A); + + while (iouring_internal_init (EV_A) < 0) + ev_syserr ("(libev) io_uring_setup"); + + fd_rearm_all (EV_A); + + ev_io_stop (EV_A_ &iouring_tfd_w); + ev_io_set (EV_A_ &iouring_tfd_w, iouring_tfd, EV_READ); + ev_io_start (EV_A_ &iouring_tfd_w); +} + +/*****************************************************************************/ + +static void +iouring_modify (EV_P_ int fd, int oev, int nev) +{ + if (oev) + { + /* we assume the sqe's are all "properly" initialised */ + struct io_uring_sqe *sqe = iouring_sqe_get (EV_A); + sqe->opcode = IORING_OP_POLL_REMOVE; + sqe->fd = fd; + /* Jens Axboe notified me that user_data is not what is documented, but is + * some kind of unique ID that has to match, otherwise the request cannot + * be removed. Since we don't *really* have that, we pass in the old + * generation counter - if that fails, too bad, it will hopefully be removed + * at close time and then be ignored. */ + sqe->addr = (uint32_t)fd | ((__u64)(uint32_t)anfds [fd].egen << 32); + sqe->user_data = (uint64_t)-1; + iouring_sqe_submit (EV_A_ sqe); + + /* increment generation counter to avoid handling old events */ + ++anfds [fd].egen; + } + + if (nev) + { + struct io_uring_sqe *sqe = iouring_sqe_get (EV_A); + sqe->opcode = IORING_OP_POLL_ADD; + sqe->fd = fd; + sqe->addr = 0; + sqe->user_data = (uint32_t)fd | ((__u64)(uint32_t)anfds [fd].egen << 32); + sqe->poll_events = + (nev & EV_READ ? POLLIN : 0) + | (nev & EV_WRITE ? POLLOUT : 0); + iouring_sqe_submit (EV_A_ sqe); + } +} + +inline_size +void +iouring_tfd_update (EV_P_ ev_tstamp timeout) +{ + ev_tstamp tfd_to = mn_now + timeout; + + /* we assume there will be many iterations per timer change, so + * we only re-set the timerfd when we have to because its expiry + * is too late. + */ + if (ecb_expect_false (tfd_to < iouring_tfd_to)) + { + struct itimerspec its; + + iouring_tfd_to = tfd_to; + EV_TS_SET (its.it_interval, 0.); + EV_TS_SET (its.it_value, tfd_to); + + if (timerfd_settime (iouring_tfd, TFD_TIMER_ABSTIME, &its, 0) < 0) + assert (("libev: iouring timerfd_settime failed", 0)); + } +} + +inline_size +void +iouring_process_cqe (EV_P_ struct io_uring_cqe *cqe) +{ + int fd = cqe->user_data & 0xffffffffU; + uint32_t gen = cqe->user_data >> 32; + int res = cqe->res; + + /* user_data -1 is a remove that we are not atm. interested in */ + if (cqe->user_data == (uint64_t)-1) + return; + + assert (("libev: io_uring fd must be in-bounds", fd >= 0 && fd < anfdmax)); + + /* documentation lies, of course. the result value is NOT like + * normal syscalls, but like linux raw syscalls, i.e. negative + * error numbers. fortunate, as otherwise there would be no way + * to get error codes at all. still, why not document this? + */ + + /* ignore event if generation doesn't match */ + /* other than skipping removal events, */ + /* this should actually be very rare */ + if (ecb_expect_false (gen != (uint32_t)anfds [fd].egen)) + return; + + if (ecb_expect_false (res < 0)) + { + /*TODO: EINVAL handling (was something failed with this fd)*/ + + if (res == -EBADF) + { + assert (("libev: event loop rejected bad fd", res != -EBADF)); + fd_kill (EV_A_ fd); + } + else + { + errno = -res; + ev_syserr ("(libev) IORING_OP_POLL_ADD"); + } + + return; + } + + /* feed events, we do not expect or handle POLLNVAL */ + fd_event ( + EV_A_ + fd, + (res & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0) + | (res & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0) + ); + + /* io_uring is oneshot, so we need to re-arm the fd next iteration */ + /* this also means we usually have to do at least one syscall per iteration */ + anfds [fd].events = 0; + fd_change (EV_A_ fd, EV_ANFD_REIFY); +} + +/* called when the event queue overflows */ +ecb_cold +static void +iouring_overflow (EV_P) +{ + /* we have two options, resize the queue (by tearing down + * everything and recreating it, or living with it + * and polling. + * we implement this by resizing the queue, and, if that fails, + * we just recreate the state on every failure, which + * kind of is a very inefficient poll. + * one danger is, due to the bios toward lower fds, + * we will only really get events for those, so + * maybe we need a poll() fallback, after all. + */ + /*EV_CQ_VAR (overflow) = 0;*/ /* need to do this if we keep the state and poll manually */ + + fd_rearm_all (EV_A); + + /* we double the size until we hit the hard-to-probe maximum */ + if (!iouring_max_entries) + { + iouring_entries <<= 1; + iouring_fork (EV_A); + } + else + { + /* we hit the kernel limit, we should fall back to something else. + * we can either poll() a few times and hope for the best, + * poll always, or switch to epoll. + * TODO: is this necessary with newer kernels? + */ + + iouring_internal_destroy (EV_A); + + /* this should make it so that on return, we don't call any uring functions */ + iouring_to_submit = 0; + + for (;;) + { + backend = epoll_init (EV_A_ 0); + + if (backend) + break; + + ev_syserr ("(libev) iouring switch to epoll"); + } + } +} + +/* handle any events in the completion queue, return true if there were any */ +static int +iouring_handle_cq (EV_P) +{ + unsigned head, tail, mask; + + head = EV_CQ_VAR (head); + ECB_MEMORY_FENCE_ACQUIRE; + tail = EV_CQ_VAR (tail); + + if (head == tail) + return 0; + + /* it can only overflow if we have events, yes, yes? */ + if (ecb_expect_false (EV_CQ_VAR (overflow))) + { + iouring_overflow (EV_A); + return 1; + } + + mask = EV_CQ_VAR (ring_mask); + + do + iouring_process_cqe (EV_A_ &EV_CQES [head++ & mask]); + while (head != tail); + + EV_CQ_VAR (head) = head; + ECB_MEMORY_FENCE_RELEASE; + + return 1; +} + +static void +iouring_poll (EV_P_ ev_tstamp timeout) +{ + /* if we have events, no need for extra syscalls, but we might have to queue events */ + /* we also clar the timeout if there are outstanding fdchanges */ + /* the latter should only happen if both the sq and cq are full, most likely */ + /* because we have a lot of event sources that immediately complete */ + /* TODO: fdchacngecnt is always 0 because fd_reify does not have two buffers yet */ + if (iouring_handle_cq (EV_A) || fdchangecnt) + timeout = EV_TS_CONST (0.); + else + /* no events, so maybe wait for some */ + iouring_tfd_update (EV_A_ timeout); + + /* only enter the kernel if we have something to submit, or we need to wait */ + if (timeout || iouring_to_submit) + { + int res = iouring_enter (EV_A_ timeout); + + if (ecb_expect_false (res < 0)) + if (errno == EINTR) + /* ignore */; + else if (errno == EBUSY) + /* cq full, cannot submit - should be rare because we flush the cq first, so simply ignore */; + else + ev_syserr ("(libev) iouring setup"); + else + iouring_handle_cq (EV_A); + } +} + +inline_size +int +iouring_init (EV_P_ int flags) +{ + iouring_entries = IOURING_INIT_ENTRIES; + iouring_max_entries = 0; + + if (iouring_internal_init (EV_A) < 0) + { + iouring_internal_destroy (EV_A); + return 0; + } + + ev_io_init (&iouring_tfd_w, iouring_tfd_cb, iouring_tfd, EV_READ); + ev_set_priority (&iouring_tfd_w, EV_MINPRI); + ev_io_start (EV_A_ &iouring_tfd_w); + ev_unref (EV_A); /* watcher should not keep loop alive */ + + backend_modify = iouring_modify; + backend_poll = iouring_poll; + + return EVBACKEND_IOURING; +} + +inline_size +void +iouring_destroy (EV_P) +{ + iouring_internal_destroy (EV_A); +} + diff --git a/contrib/libev/ev_kqueue.c b/contrib/libev/ev_kqueue.c index 0c05ab9e7..69c5147f1 100644 --- a/contrib/libev/ev_kqueue.c +++ b/contrib/libev/ev_kqueue.c @@ -1,7 +1,7 @@ /* * libev kqueue backend * - * Copyright (c) 2007,2008,2009,2010,2011,2012,2013 Marc Alexander Lehmann <libev@schmorp.de> + * Copyright (c) 2007,2008,2009,2010,2011,2012,2013,2016,2019 Marc Alexander Lehmann <libev@schmorp.de> * All rights reserved. * * Redistribution and use in source and binary forms, with or without modifica- @@ -48,7 +48,7 @@ void kqueue_change (EV_P_ int fd, int filter, int flags, int fflags) { ++kqueue_changecnt; - array_needsize (struct kevent, kqueue_changes, kqueue_changemax, kqueue_changecnt, EMPTY2); + array_needsize (struct kevent, kqueue_changes, kqueue_changemax, kqueue_changecnt, array_needsize_noinit); EV_SET (&kqueue_changes [kqueue_changecnt - 1], fd, filter, flags, fflags, 0, 0); } @@ -103,10 +103,10 @@ kqueue_poll (EV_P_ ev_tstamp timeout) EV_ACQUIRE_CB; kqueue_changecnt = 0; - if (expect_false (res < 0)) + if (ecb_expect_false (res < 0)) { if (errno != EINTR) - ev_syserr ("(libev) kevent"); + ev_syserr ("(libev) kqueue kevent"); return; } @@ -115,7 +115,7 @@ kqueue_poll (EV_P_ ev_tstamp timeout) { int fd = kqueue_events [i].ident; - if (expect_false (kqueue_events [i].flags & EV_ERROR)) + if (ecb_expect_false (kqueue_events [i].flags & EV_ERROR)) { int err = kqueue_events [i].data; @@ -129,10 +129,16 @@ kqueue_poll (EV_P_ ev_tstamp timeout) if (fd_valid (fd)) kqueue_modify (EV_A_ fd, 0, anfds [fd].events); else - fd_kill (EV_A_ fd); + { + assert (("libev: kqueue found invalid fd", 0)); + fd_kill (EV_A_ fd); + } } else /* on all other errors, we error out on the fd */ - fd_kill (EV_A_ fd); + { + assert (("libev: kqueue found invalid fd", 0)); + fd_kill (EV_A_ fd); + } } } else @@ -145,7 +151,7 @@ kqueue_poll (EV_P_ ev_tstamp timeout) ); } - if (expect_false (res == kqueue_eventmax)) + if (ecb_expect_false (res == kqueue_eventmax)) { ev_free (kqueue_events); kqueue_eventmax = array_nextsize (sizeof (struct kevent), kqueue_eventmax, kqueue_eventmax + 1); @@ -164,7 +170,7 @@ kqueue_init (EV_P_ int flags) fcntl (backend_fd, F_SETFD, FD_CLOEXEC); /* not sure if necessary, hopefully doesn't hurt */ - backend_mintime = 1e-9; /* apparently, they did the right thing in freebsd */ + backend_mintime = EV_TS_CONST (1e-9); /* apparently, they did the right thing in freebsd */ backend_modify = kqueue_modify; backend_poll = kqueue_poll; diff --git a/contrib/libev/ev_linuxaio.c b/contrib/libev/ev_linuxaio.c new file mode 100644 index 000000000..4687a703e --- /dev/null +++ b/contrib/libev/ev_linuxaio.c @@ -0,0 +1,620 @@ +/* + * libev linux aio fd activity backend + * + * Copyright (c) 2019 Marc Alexander Lehmann <libev@schmorp.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +/* + * general notes about linux aio: + * + * a) at first, the linux aio IOCB_CMD_POLL functionality introduced in + * 4.18 looks too good to be true: both watchers and events can be + * batched, and events can even be handled in userspace using + * a ring buffer shared with the kernel. watchers can be canceled + * regardless of whether the fd has been closed. no problems with fork. + * ok, the ring buffer is 200% undocumented (there isn't even a + * header file), but otherwise, it's pure bliss! + * b) ok, watchers are one-shot, so you have to re-arm active ones + * on every iteration. so much for syscall-less event handling, + * but at least these re-arms can be batched, no big deal, right? + * c) well, linux as usual: the documentation lies to you: io_submit + * sometimes returns EINVAL because the kernel doesn't feel like + * handling your poll mask - ttys can be polled for POLLOUT, + * POLLOUT|POLLIN, but polling for POLLIN fails. just great, + * so we have to fall back to something else (hello, epoll), + * but at least the fallback can be slow, because these are + * exceptional cases, right? + * d) hmm, you have to tell the kernel the maximum number of watchers + * you want to queue when initialising the aio context. but of + * course the real limit is magically calculated in the kernel, and + * is often higher then we asked for. so we just have to destroy + * the aio context and re-create it a bit larger if we hit the limit. + * (starts to remind you of epoll? well, it's a bit more deterministic + * and less gambling, but still ugly as hell). + * e) that's when you find out you can also hit an arbitrary system-wide + * limit. or the kernel simply doesn't want to handle your watchers. + * what the fuck do we do then? you guessed it, in the middle + * of event handling we have to switch to 100% epoll polling. and + * that better is as fast as normal epoll polling, so you practically + * have to use the normal epoll backend with all its quirks. + * f) end result of this train wreck: it inherits all the disadvantages + * from epoll, while adding a number on its own. why even bother to use + * it? because if conditions are right and your fds are supported and you + * don't hit a limit, this backend is actually faster, doesn't gamble with + * your fds, batches watchers and events and doesn't require costly state + * recreates. well, until it does. + * g) all of this makes this backend use almost twice as much code as epoll. + * which in turn uses twice as much code as poll. and that#s not counting + * the fact that this backend also depends on the epoll backend, making + * it three times as much code as poll, or kqueue. + * h) bleah. why can't linux just do kqueue. sure kqueue is ugly, but by now + * it's clear that whatever linux comes up with is far, far, far worse. + */ + +#include <sys/time.h> /* actually linux/time.h, but we must assume they are compatible */ +#include <poll.h> +#include <linux/aio_abi.h> + +/*****************************************************************************/ +/* syscall wrapdadoop - this section has the raw api/abi definitions */ + +#include <sys/syscall.h> /* no glibc wrappers */ + +/* aio_abi.h is not versioned in any way, so we cannot test for its existance */ +#define IOCB_CMD_POLL 5 + +/* taken from linux/fs/aio.c. yup, that's a .c file. + * not only is this totally undocumented, not even the source code + * can tell you what the future semantics of compat_features and + * incompat_features are, or what header_length actually is for. + */ +#define AIO_RING_MAGIC 0xa10a10a1 +#define EV_AIO_RING_INCOMPAT_FEATURES 0 +struct aio_ring +{ + unsigned id; /* kernel internal index number */ + unsigned nr; /* number of io_events */ + unsigned head; /* Written to by userland or by kernel. */ + unsigned tail; + + unsigned magic; + unsigned compat_features; + unsigned incompat_features; + unsigned header_length; /* size of aio_ring */ + + struct io_event io_events[0]; +}; + +inline_size +int +evsys_io_setup (unsigned nr_events, aio_context_t *ctx_idp) +{ + return ev_syscall2 (SYS_io_setup, nr_events, ctx_idp); +} + +inline_size +int +evsys_io_destroy (aio_context_t ctx_id) +{ + return ev_syscall1 (SYS_io_destroy, ctx_id); +} + +inline_size +int +evsys_io_submit (aio_context_t ctx_id, long nr, struct iocb *cbp[]) +{ + return ev_syscall3 (SYS_io_submit, ctx_id, nr, cbp); +} + +inline_size +int +evsys_io_cancel (aio_context_t ctx_id, struct iocb *cbp, struct io_event *result) +{ + return ev_syscall3 (SYS_io_cancel, ctx_id, cbp, result); +} + +inline_size +int +evsys_io_getevents (aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout) +{ + return ev_syscall5 (SYS_io_getevents, ctx_id, min_nr, nr, events, timeout); +} + +/*****************************************************************************/ +/* actual backed implementation */ + +ecb_cold +static int +linuxaio_nr_events (EV_P) +{ + /* we start with 16 iocbs and incraese from there + * that's tiny, but the kernel has a rather low system-wide + * limit that can be reached quickly, so let's be parsimonious + * with this resource. + * Rest assured, the kernel generously rounds up small and big numbers + * in different ways (but doesn't seem to charge you for it). + * The 15 here is because the kernel usually has a power of two as aio-max-nr, + * and this helps to take advantage of that limit. + */ + + /* we try to fill 4kB pages exactly. + * the ring buffer header is 32 bytes, every io event is 32 bytes. + * the kernel takes the io requests number, doubles it, adds 2 + * and adds the ring buffer. + * the way we use this is by starting low, and then roughly doubling the + * size each time we hit a limit. + */ + + int requests = 15 << linuxaio_iteration; + int one_page = (4096 + / sizeof (struct io_event) ) / 2; /* how many fit into one page */ + int first_page = ((4096 - sizeof (struct aio_ring)) + / sizeof (struct io_event) - 2) / 2; /* how many fit into the first page */ + + /* if everything fits into one page, use count exactly */ + if (requests > first_page) + /* otherwise, round down to full pages and add the first page */ + requests = requests / one_page * one_page + first_page; + + return requests; +} + +/* we use out own wrapper structure in case we ever want to do something "clever" */ +typedef struct aniocb +{ + struct iocb io; + /*int inuse;*/ +} *ANIOCBP; + +inline_size +void +linuxaio_array_needsize_iocbp (ANIOCBP *base, int offset, int count) +{ + while (count--) + { + /* TODO: quite the overhead to allocate every iocb separately, maybe use our own allocator? */ + ANIOCBP iocb = (ANIOCBP)ev_malloc (sizeof (*iocb)); + + /* full zero initialise is probably not required at the moment, but + * this is not well documented, so we better do it. + */ + memset (iocb, 0, sizeof (*iocb)); + + iocb->io.aio_lio_opcode = IOCB_CMD_POLL; + iocb->io.aio_fildes = offset; + + base [offset++] = iocb; + } +} + +ecb_cold +static void +linuxaio_free_iocbp (EV_P) +{ + while (linuxaio_iocbpmax--) + ev_free (linuxaio_iocbps [linuxaio_iocbpmax]); + + linuxaio_iocbpmax = 0; /* next resize will completely reallocate the array, at some overhead */ +} + +static void +linuxaio_modify (EV_P_ int fd, int oev, int nev) +{ + array_needsize (ANIOCBP, linuxaio_iocbps, linuxaio_iocbpmax, fd + 1, linuxaio_array_needsize_iocbp); + ANIOCBP iocb = linuxaio_iocbps [fd]; + ANFD *anfd = &anfds [fd]; + + if (ecb_expect_false (iocb->io.aio_reqprio < 0)) + { + /* we handed this fd over to epoll, so undo this first */ + /* we do it manually because the optimisations on epoll_modify won't do us any good */ + epoll_ctl (backend_fd, EPOLL_CTL_DEL, fd, 0); + anfd->emask = 0; + iocb->io.aio_reqprio = 0; + } + else if (ecb_expect_false (iocb->io.aio_buf)) + { + /* iocb active, so cancel it first before resubmit */ + /* this assumes we only ever get one call per fd per loop iteration */ + for (;;) + { + /* on all relevant kernels, io_cancel fails with EINPROGRESS on "success" */ + if (ecb_expect_false (evsys_io_cancel (linuxaio_ctx, &iocb->io, (struct io_event *)0) == 0)) + break; + + if (ecb_expect_true (errno == EINPROGRESS)) + break; + + /* the EINPROGRESS test is for nicer error message. clumsy. */ + if (errno != EINTR) + { + assert (("libev: linuxaio unexpected io_cancel failed", errno != EINTR && errno != EINPROGRESS)); + break; + } + } + + /* increment generation counter to avoid handling old events */ + ++anfd->egen; + } + + iocb->io.aio_buf = (nev & EV_READ ? POLLIN : 0) + | (nev & EV_WRITE ? POLLOUT : 0); + + if (nev) + { + iocb->io.aio_data = (uint32_t)fd | ((__u64)(uint32_t)anfd->egen << 32); + + /* queue iocb up for io_submit */ + /* this assumes we only ever get one call per fd per loop iteration */ + ++linuxaio_submitcnt; + array_needsize (struct iocb *, linuxaio_submits, linuxaio_submitmax, linuxaio_submitcnt, array_needsize_noinit); + linuxaio_submits [linuxaio_submitcnt - 1] = &iocb->io; + } +} + +static void +linuxaio_epoll_cb (EV_P_ struct ev_io *w, int revents) +{ + epoll_poll (EV_A_ 0); +} + +inline_speed +void +linuxaio_fd_rearm (EV_P_ int fd) +{ + anfds [fd].events = 0; + linuxaio_iocbps [fd]->io.aio_buf = 0; + fd_change (EV_A_ fd, EV_ANFD_REIFY); +} + +static void +linuxaio_parse_events (EV_P_ struct io_event *ev, int nr) +{ + while (nr) + { + int fd = ev->data & 0xffffffff; + uint32_t gen = ev->data >> 32; + int res = ev->res; + + assert (("libev: iocb fd must be in-bounds", fd >= 0 && fd < anfdmax)); + + /* only accept events if generation counter matches */ + if (ecb_expect_true (gen == (uint32_t)anfds [fd].egen)) + { + /* feed events, we do not expect or handle POLLNVAL */ + fd_event ( + EV_A_ + fd, + (res & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0) + | (res & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0) + ); + + /* linux aio is oneshot: rearm fd. TODO: this does more work than strictly needed */ + linuxaio_fd_rearm (EV_A_ fd); + } + + --nr; + ++ev; + } +} + +/* get any events from ring buffer, return true if any were handled */ +static int +linuxaio_get_events_from_ring (EV_P) +{ + struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx; + unsigned head, tail; + + /* the kernel reads and writes both of these variables, */ + /* as a C extension, we assume that volatile use here */ + /* both makes reads atomic and once-only */ + head = *(volatile unsigned *)&ring->head; + ECB_MEMORY_FENCE_ACQUIRE; + tail = *(volatile unsigned *)&ring->tail; + + if (head == tail) + return 0; + + /* parse all available events, but only once, to avoid starvation */ + if (ecb_expect_true (tail > head)) /* normal case around */ + linuxaio_parse_events (EV_A_ ring->io_events + head, tail - head); + else /* wrapped around */ + { + linuxaio_parse_events (EV_A_ ring->io_events + head, ring->nr - head); + linuxaio_parse_events (EV_A_ ring->io_events, tail); + } + + ECB_MEMORY_FENCE_RELEASE; + /* as an extension to C, we hope that the volatile will make this atomic and once-only */ + *(volatile unsigned *)&ring->head = tail; + + return 1; +} + +inline_size +int +linuxaio_ringbuf_valid (EV_P) +{ + struct aio_ring *ring = (struct aio_ring *)linuxaio_ctx; + + return ecb_expect_true (ring->magic == AIO_RING_MAGIC) + && ring->incompat_features == EV_AIO_RING_INCOMPAT_FEATURES + && ring->header_length == sizeof (struct aio_ring); /* TODO: or use it to find io_event[0]? */ +} + +/* read at least one event from kernel, or timeout */ +inline_size +void +linuxaio_get_events (EV_P_ ev_tstamp timeout) +{ + struct timespec ts; + struct io_event ioev[8]; /* 256 octet stack space */ + int want = 1; /* how many events to request */ + int ringbuf_valid = linuxaio_ringbuf_valid (EV_A); + + if (ecb_expect_true (ringbuf_valid)) + { + /* if the ring buffer has any events, we don't wait or call the kernel at all */ + if (linuxaio_get_events_from_ring (EV_A)) + return; + + /* if the ring buffer is empty, and we don't have a timeout, then don't call the kernel */ + if (!timeout) + return; + } + else + /* no ringbuffer, request slightly larger batch */ + want = sizeof (ioev) / sizeof (ioev [0]); + + /* no events, so wait for some + * for fairness reasons, we do this in a loop, to fetch all events + */ + for (;;) + { + int res; + + EV_RELEASE_CB; + + EV_TS_SET (ts, timeout); + res = evsys_io_getevents (linuxaio_ctx, 1, want, ioev, &ts); + + EV_ACQUIRE_CB; + + if (res < 0) + if (errno == EINTR) + /* ignored, retry */; + else + ev_syserr ("(libev) linuxaio io_getevents"); + else if (res) + { + /* at least one event available, handle them */ + linuxaio_parse_events (EV_A_ ioev, res); + + if (ecb_expect_true (ringbuf_valid)) + { + /* if we have a ring buffer, handle any remaining events in it */ + linuxaio_get_events_from_ring (EV_A); + + /* at this point, we should have handled all outstanding events */ + break; + } + else if (res < want) + /* otherwise, if there were fewere events than we wanted, we assume there are no more */ + break; + } + else + break; /* no events from the kernel, we are done */ + + timeout = EV_TS_CONST (0.); /* only wait in the first iteration */ + } +} + +inline_size +int +linuxaio_io_setup (EV_P) +{ + linuxaio_ctx = 0; + return evsys_io_setup (linuxaio_nr_events (EV_A), &linuxaio_ctx); +} + +static void +linuxaio_poll (EV_P_ ev_tstamp timeout) +{ + int submitted; + + /* first phase: submit new iocbs */ + + /* io_submit might return less than the requested number of iocbs */ + /* this is, afaics, only because of errors, but we go by the book and use a loop, */ + /* which allows us to pinpoint the erroneous iocb */ + for (submitted = 0; submitted < linuxaio_submitcnt; ) + { + int res = evsys_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted); + + if (ecb_expect_false (res < 0)) + if (errno == EINVAL) + { + /* This happens for unsupported fds, officially, but in my testing, + * also randomly happens for supported fds. We fall back to good old + * poll() here, under the assumption that this is a very rare case. + * See https://lore.kernel.org/patchwork/patch/1047453/ to see + * discussion about such a case (ttys) where polling for POLLIN + * fails but POLLIN|POLLOUT works. + */ + struct iocb *iocb = linuxaio_submits [submitted]; + epoll_modify (EV_A_ iocb->aio_fildes, 0, anfds [iocb->aio_fildes].events); + iocb->aio_reqprio = -1; /* mark iocb as epoll */ + + res = 1; /* skip this iocb - another iocb, another chance */ + } + else if (errno == EAGAIN) + { + /* This happens when the ring buffer is full, or some other shit we + * don't know and isn't documented. Most likely because we have too + * many requests and linux aio can't be assed to handle them. + * In this case, we try to allocate a larger ring buffer, freeing + * ours first. This might fail, in which case we have to fall back to 100% + * epoll. + * God, how I hate linux not getting its act together. Ever. + */ + evsys_io_destroy (linuxaio_ctx); + linuxaio_submitcnt = 0; + + /* rearm all fds with active iocbs */ + { + int fd; + for (fd = 0; fd < linuxaio_iocbpmax; ++fd) + if (linuxaio_iocbps [fd]->io.aio_buf) + linuxaio_fd_rearm (EV_A_ fd); + } + + ++linuxaio_iteration; + if (linuxaio_io_setup (EV_A) < 0) + { + /* TODO: rearm all and recreate epoll backend from scratch */ + /* TODO: might be more prudent? */ + + /* to bad, we can't get a new aio context, go 100% epoll */ + linuxaio_free_iocbp (EV_A); + ev_io_stop (EV_A_ &linuxaio_epoll_w); + ev_ref (EV_A); + linuxaio_ctx = 0; + + backend = EVBACKEND_EPOLL; + backend_modify = epoll_modify; + backend_poll = epoll_poll; + } + + timeout = EV_TS_CONST (0.); + /* it's easiest to handle this mess in another iteration */ + return; + } + else if (errno == EBADF) + { + assert (("libev: event loop rejected bad fd", errno != EBADF)); + fd_kill (EV_A_ linuxaio_submits [submitted]->aio_fildes); + + res = 1; /* skip this iocb */ + } + else if (errno == EINTR) /* not seen in reality, not documented */ + res = 0; /* silently ignore and retry */ + else + { + ev_syserr ("(libev) linuxaio io_submit"); + res = 0; + } + + submitted += res; + } + + linuxaio_submitcnt = 0; + + /* second phase: fetch and parse events */ + + linuxaio_get_events (EV_A_ timeout); +} + +inline_size +int +linuxaio_init (EV_P_ int flags) +{ + /* would be great to have a nice test for IOCB_CMD_POLL instead */ + /* also: test some semi-common fd types, such as files and ttys in recommended_backends */ + /* 4.18 introduced IOCB_CMD_POLL, 4.19 made epoll work, and we need that */ + if (ev_linux_version () < 0x041300) + return 0; + + if (!epoll_init (EV_A_ 0)) + return 0; + + linuxaio_iteration = 0; + + if (linuxaio_io_setup (EV_A) < 0) + { + epoll_destroy (EV_A); + return 0; + } + + ev_io_init (&linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ); + ev_set_priority (&linuxaio_epoll_w, EV_MAXPRI); + ev_io_start (EV_A_ &linuxaio_epoll_w); + ev_unref (EV_A); /* watcher should not keep loop alive */ + + backend_modify = linuxaio_modify; + backend_poll = linuxaio_poll; + + linuxaio_iocbpmax = 0; + linuxaio_iocbps = 0; + + linuxaio_submits = 0; + linuxaio_submitmax = 0; + linuxaio_submitcnt = 0; + + return EVBACKEND_LINUXAIO; +} + +inline_size +void +linuxaio_destroy (EV_P) +{ + epoll_destroy (EV_A); + linuxaio_free_iocbp (EV_A); + evsys_io_destroy (linuxaio_ctx); /* fails in child, aio context is destroyed */ +} + +ecb_cold +static void +linuxaio_fork (EV_P) +{ + linuxaio_submitcnt = 0; /* all pointers were invalidated */ + linuxaio_free_iocbp (EV_A); /* this frees all iocbs, which is very heavy-handed */ + evsys_io_destroy (linuxaio_ctx); /* fails in child, aio context is destroyed */ + + linuxaio_iteration = 0; /* we start over in the child */ + + while (linuxaio_io_setup (EV_A) < 0) + ev_syserr ("(libev) linuxaio io_setup"); + + /* forking epoll should also effectively unregister all fds from the backend */ + epoll_fork (EV_A); + /* epoll_fork already did this. hopefully */ + /*fd_rearm_all (EV_A);*/ + + ev_io_stop (EV_A_ &linuxaio_epoll_w); + ev_io_set (EV_A_ &linuxaio_epoll_w, backend_fd, EV_READ); + ev_io_start (EV_A_ &linuxaio_epoll_w); +} + diff --git a/contrib/libev/ev_poll.c b/contrib/libev/ev_poll.c index bd742b07f..e5508ddb0 100644 --- a/contrib/libev/ev_poll.c +++ b/contrib/libev/ev_poll.c @@ -1,7 +1,7 @@ /* * libev poll fd activity backend * - * Copyright (c) 2007,2008,2009,2010,2011 Marc Alexander Lehmann <libev@schmorp.de> + * Copyright (c) 2007,2008,2009,2010,2011,2016,2019 Marc Alexander Lehmann <libev@schmorp.de> * All rights reserved. * * Redistribution and use in source and binary forms, with or without modifica- @@ -41,10 +41,12 @@ inline_size void -pollidx_init (int *base, int count) +array_needsize_pollidx (int *base, int offset, int count) { - /* consider using memset (.., -1, ...), which is practically guaranteed - * to work on all systems implementing poll */ + /* using memset (.., -1, ...) is tempting, we we try + * to be ultraportable + */ + base += offset; while (count--) *base++ = -1; } @@ -57,14 +59,14 @@ poll_modify (EV_P_ int fd, int oev, int nev) if (oev == nev) return; - array_needsize (int, pollidxs, pollidxmax, fd + 1, pollidx_init); + array_needsize (int, pollidxs, pollidxmax, fd + 1, array_needsize_pollidx); idx = pollidxs [fd]; if (idx < 0) /* need to allocate a new pollfd */ { pollidxs [fd] = idx = pollcnt++; - array_needsize (struct pollfd, polls, pollmax, pollcnt, EMPTY2); + array_needsize (struct pollfd, polls, pollmax, pollcnt, array_needsize_noinit); polls [idx].fd = fd; } @@ -78,7 +80,7 @@ poll_modify (EV_P_ int fd, int oev, int nev) { pollidxs [fd] = -1; - if (expect_true (idx < --pollcnt)) + if (ecb_expect_true (idx < --pollcnt)) { polls [idx] = polls [pollcnt]; pollidxs [polls [idx].fd] = idx; @@ -93,10 +95,10 @@ poll_poll (EV_P_ ev_tstamp timeout) int res; EV_RELEASE_CB; - res = poll (polls, pollcnt, timeout * 1e3); + res = poll (polls, pollcnt, EV_TS_TO_MSEC (timeout)); EV_ACQUIRE_CB; - if (expect_false (res < 0)) + if (ecb_expect_false (res < 0)) { if (errno == EBADF) fd_ebadf (EV_A); @@ -108,14 +110,17 @@ poll_poll (EV_P_ ev_tstamp timeout) else for (p = polls; res; ++p) { - assert (("libev: poll() returned illegal result, broken BSD kernel?", p < polls + pollcnt)); + assert (("libev: poll returned illegal result, broken BSD kernel?", p < polls + pollcnt)); - if (expect_false (p->revents)) /* this expect is debatable */ + if (ecb_expect_false (p->revents)) /* this expect is debatable */ { --res; - if (expect_false (p->revents & POLLNVAL)) - fd_kill (EV_A_ p->fd); + if (ecb_expect_false (p->revents & POLLNVAL)) + { + assert (("libev: poll found invalid fd in poll set", 0)); + fd_kill (EV_A_ p->fd); + } else fd_event ( EV_A_ @@ -131,7 +136,7 @@ inline_size int poll_init (EV_P_ int flags) { - backend_mintime = 1e-3; + backend_mintime = EV_TS_CONST (1e-3); backend_modify = poll_modify; backend_poll = poll_poll; diff --git a/contrib/libev/ev_port.c b/contrib/libev/ev_port.c index c7b0b70c1..f4cd9d99c 100644 --- a/contrib/libev/ev_port.c +++ b/contrib/libev/ev_port.c @@ -1,7 +1,7 @@ /* * libev solaris event port backend * - * Copyright (c) 2007,2008,2009,2010,2011 Marc Alexander Lehmann <libev@schmorp.de> + * Copyright (c) 2007,2008,2009,2010,2011,2019 Marc Alexander Lehmann <libev@schmorp.de> * All rights reserved. * * Redistribution and use in source and binary forms, with or without modifica- @@ -69,7 +69,10 @@ port_associate_and_check (EV_P_ int fd, int ev) ) { if (errno == EBADFD) - fd_kill (EV_A_ fd); + { + assert (("libev: port_associate found invalid fd", errno != EBADFD)); + fd_kill (EV_A_ fd); + } else ev_syserr ("(libev) port_associate"); } @@ -129,7 +132,7 @@ port_poll (EV_P_ ev_tstamp timeout) } } - if (expect_false (nget == port_eventmax)) + if (ecb_expect_false (nget == port_eventmax)) { ev_free (port_events); port_eventmax = array_nextsize (sizeof (port_event_t), port_eventmax, port_eventmax + 1); @@ -151,11 +154,11 @@ port_init (EV_P_ int flags) /* if my reading of the opensolaris kernel sources are correct, then * opensolaris does something very stupid: it checks if the time has already - * elapsed and doesn't round up if that is the case,m otherwise it DOES round + * elapsed and doesn't round up if that is the case, otherwise it DOES round * up. Since we can't know what the case is, we need to guess by using a * "large enough" timeout. Normally, 1e-9 would be correct. */ - backend_mintime = 1e-3; /* needed to compensate for port_getn returning early */ + backend_mintime = EV_TS_CONST (1e-3); /* needed to compensate for port_getn returning early */ backend_modify = port_modify; backend_poll = port_poll; diff --git a/contrib/libev/ev_select.c b/contrib/libev/ev_select.c index ed1fc7ad9..b862c8113 100644 --- a/contrib/libev/ev_select.c +++ b/contrib/libev/ev_select.c @@ -108,7 +108,7 @@ select_modify (EV_P_ int fd, int oev, int nev) int word = fd / NFDBITS; fd_mask mask = 1UL << (fd % NFDBITS); - if (expect_false (vec_max <= word)) + if (ecb_expect_false (vec_max <= word)) { int new_max = word + 1; @@ -171,7 +171,7 @@ select_poll (EV_P_ ev_tstamp timeout) #endif EV_ACQUIRE_CB; - if (expect_false (res < 0)) + if (ecb_expect_false (res < 0)) { #if EV_SELECT_IS_WINSOCKET errno = WSAGetLastError (); @@ -197,7 +197,7 @@ select_poll (EV_P_ ev_tstamp timeout) { if (timeout) { - unsigned long ms = timeout * 1e3; + unsigned long ms = EV_TS_TO_MSEC (timeout); Sleep (ms ? ms : 1); } @@ -236,7 +236,7 @@ select_poll (EV_P_ ev_tstamp timeout) if (FD_ISSET (handle, (fd_set *)vec_eo)) events |= EV_WRITE; #endif - if (expect_true (events)) + if (ecb_expect_true (events)) fd_event (EV_A_ fd, events); } } @@ -262,7 +262,7 @@ select_poll (EV_P_ ev_tstamp timeout) events |= word_r & mask ? EV_READ : 0; events |= word_w & mask ? EV_WRITE : 0; - if (expect_true (events)) + if (ecb_expect_true (events)) fd_event (EV_A_ word * NFDBITS + bit, events); } } @@ -275,7 +275,7 @@ inline_size int select_init (EV_P_ int flags) { - backend_mintime = 1e-6; + backend_mintime = EV_TS_CONST (1e-6); backend_modify = select_modify; backend_poll = select_poll; diff --git a/contrib/libev/ev_vars.h b/contrib/libev/ev_vars.h index 30e9e285d..fb0c58316 100644 --- a/contrib/libev/ev_vars.h +++ b/contrib/libev/ev_vars.h @@ -1,7 +1,7 @@ /* * loop member variable declarations * - * Copyright (c) 2007,2008,2009,2010,2011,2012,2013 Marc Alexander Lehmann <libev@schmorp.de> + * Copyright (c) 2007,2008,2009,2010,2011,2012,2013,2019 Marc Alexander Lehmann <libev@schmorp.de> * All rights reserved. * * Redistribution and use in source and binary forms, with or without modifica- @@ -107,6 +107,46 @@ VARx(int, epoll_epermcnt) VARx(int, epoll_epermmax) #endif +#if EV_USE_LINUXAIO || EV_GENWRAP +VARx(aio_context_t, linuxaio_ctx) +VARx(int, linuxaio_iteration) +VARx(struct aniocb **, linuxaio_iocbps) +VARx(int, linuxaio_iocbpmax) +VARx(struct iocb **, linuxaio_submits) +VARx(int, linuxaio_submitcnt) +VARx(int, linuxaio_submitmax) +VARx(ev_io, linuxaio_epoll_w) +#endif + +#if EV_USE_IOURING || EV_GENWRAP +VARx(int, iouring_fd) +VARx(unsigned, iouring_to_submit); +VARx(int, iouring_entries) +VARx(int, iouring_max_entries) +VARx(void *, iouring_sq_ring) +VARx(void *, iouring_cq_ring) +VARx(void *, iouring_sqes) +VARx(uint32_t, iouring_sq_ring_size) +VARx(uint32_t, iouring_cq_ring_size) +VARx(uint32_t, iouring_sqes_size) +VARx(uint32_t, iouring_sq_head) +VARx(uint32_t, iouring_sq_tail) +VARx(uint32_t, iouring_sq_ring_mask) +VARx(uint32_t, iouring_sq_ring_entries) +VARx(uint32_t, iouring_sq_flags) +VARx(uint32_t, iouring_sq_dropped) +VARx(uint32_t, iouring_sq_array) +VARx(uint32_t, iouring_cq_head) +VARx(uint32_t, iouring_cq_tail) +VARx(uint32_t, iouring_cq_ring_mask) +VARx(uint32_t, iouring_cq_ring_entries) +VARx(uint32_t, iouring_cq_overflow) +VARx(uint32_t, iouring_cq_cqes) +VARx(ev_tstamp, iouring_tfd_to) +VARx(int, iouring_tfd) +VARx(ev_io, iouring_tfd_w) +#endif + #if EV_USE_KQUEUE || EV_GENWRAP VARx(pid_t, kqueue_fd_pid) VARx(struct kevent *, kqueue_changes) @@ -187,6 +227,11 @@ VARx(ev_io, sigfd_w) VARx(sigset_t, sigfd_set) #endif +#if EV_USE_TIMERFD || EV_GENWRAP +VARx(int, timerfd) /* timerfd for time jump detection */ +VARx(ev_io, timerfd_w) +#endif + VARx(unsigned int, origflags) /* original loop flags */ #if EV_FEATURE_API || EV_GENWRAP diff --git a/contrib/libev/ev_win32.c b/contrib/libev/ev_win32.c index fd671356a..97344c3e1 100644 --- a/contrib/libev/ev_win32.c +++ b/contrib/libev/ev_win32.c @@ -154,8 +154,8 @@ ev_time (void) ui.u.LowPart = ft.dwLowDateTime; ui.u.HighPart = ft.dwHighDateTime; - /* msvc cannot convert ulonglong to double... yes, it is that sucky */ - return (LONGLONG)(ui.QuadPart - 116444736000000000) * 1e-7; + /* also, msvc cannot convert ulonglong to double... yes, it is that sucky */ + return EV_TS_FROM_USEC (((LONGLONG)(ui.QuadPart - 116444736000000000) * 1e-1)); } #endif diff --git a/contrib/libev/ev_wrap.h b/contrib/libev/ev_wrap.h index ad989ea7d..45d793ced 100644 --- a/contrib/libev/ev_wrap.h +++ b/contrib/libev/ev_wrap.h @@ -44,12 +44,46 @@ #define invoke_cb ((loop)->invoke_cb) #define io_blocktime ((loop)->io_blocktime) #define iocp ((loop)->iocp) +#define iouring_cq_cqes ((loop)->iouring_cq_cqes) +#define iouring_cq_head ((loop)->iouring_cq_head) +#define iouring_cq_overflow ((loop)->iouring_cq_overflow) +#define iouring_cq_ring ((loop)->iouring_cq_ring) +#define iouring_cq_ring_entries ((loop)->iouring_cq_ring_entries) +#define iouring_cq_ring_mask ((loop)->iouring_cq_ring_mask) +#define iouring_cq_ring_size ((loop)->iouring_cq_ring_size) +#define iouring_cq_tail ((loop)->iouring_cq_tail) +#define iouring_entries ((loop)->iouring_entries) +#define iouring_fd ((loop)->iouring_fd) +#define iouring_max_entries ((loop)->iouring_max_entries) +#define iouring_sq_array ((loop)->iouring_sq_array) +#define iouring_sq_dropped ((loop)->iouring_sq_dropped) +#define iouring_sq_flags ((loop)->iouring_sq_flags) +#define iouring_sq_head ((loop)->iouring_sq_head) +#define iouring_sq_ring ((loop)->iouring_sq_ring) +#define iouring_sq_ring_entries ((loop)->iouring_sq_ring_entries) +#define iouring_sq_ring_mask ((loop)->iouring_sq_ring_mask) +#define iouring_sq_ring_size ((loop)->iouring_sq_ring_size) +#define iouring_sq_tail ((loop)->iouring_sq_tail) +#define iouring_sqes ((loop)->iouring_sqes) +#define iouring_sqes_size ((loop)->iouring_sqes_size) +#define iouring_tfd ((loop)->iouring_tfd) +#define iouring_tfd_to ((loop)->iouring_tfd_to) +#define iouring_tfd_w ((loop)->iouring_tfd_w) +#define iouring_to_submit ((loop)->iouring_to_submit) #define kqueue_changecnt ((loop)->kqueue_changecnt) #define kqueue_changemax ((loop)->kqueue_changemax) #define kqueue_changes ((loop)->kqueue_changes) #define kqueue_eventmax ((loop)->kqueue_eventmax) #define kqueue_events ((loop)->kqueue_events) #define kqueue_fd_pid ((loop)->kqueue_fd_pid) +#define linuxaio_ctx ((loop)->linuxaio_ctx) +#define linuxaio_epoll_w ((loop)->linuxaio_epoll_w) +#define linuxaio_iocbpmax ((loop)->linuxaio_iocbpmax) +#define linuxaio_iocbps ((loop)->linuxaio_iocbps) +#define linuxaio_iteration ((loop)->linuxaio_iteration) +#define linuxaio_submitcnt ((loop)->linuxaio_submitcnt) +#define linuxaio_submitmax ((loop)->linuxaio_submitmax) +#define linuxaio_submits ((loop)->linuxaio_submits) #define loop_count ((loop)->loop_count) #define loop_depth ((loop)->loop_depth) #define loop_done ((loop)->loop_done) @@ -89,6 +123,8 @@ #define sigfd_w ((loop)->sigfd_w) #define timeout_blocktime ((loop)->timeout_blocktime) #define timercnt ((loop)->timercnt) +#define timerfd ((loop)->timerfd) +#define timerfd_w ((loop)->timerfd_w) #define timermax ((loop)->timermax) #define timers ((loop)->timers) #define userdata ((loop)->userdata) @@ -143,12 +179,46 @@ #undef invoke_cb #undef io_blocktime #undef iocp +#undef iouring_cq_cqes +#undef iouring_cq_head +#undef iouring_cq_overflow +#undef iouring_cq_ring +#undef iouring_cq_ring_entries +#undef iouring_cq_ring_mask +#undef iouring_cq_ring_size +#undef iouring_cq_tail +#undef iouring_entries +#undef iouring_fd +#undef iouring_max_entries +#undef iouring_sq_array +#undef iouring_sq_dropped +#undef iouring_sq_flags +#undef iouring_sq_head +#undef iouring_sq_ring +#undef iouring_sq_ring_entries +#undef iouring_sq_ring_mask +#undef iouring_sq_ring_size +#undef iouring_sq_tail +#undef iouring_sqes +#undef iouring_sqes_size +#undef iouring_tfd +#undef iouring_tfd_to +#undef iouring_tfd_w +#undef iouring_to_submit #undef kqueue_changecnt #undef kqueue_changemax #undef kqueue_changes #undef kqueue_eventmax #undef kqueue_events #undef kqueue_fd_pid +#undef linuxaio_ctx +#undef linuxaio_epoll_w +#undef linuxaio_iocbpmax +#undef linuxaio_iocbps +#undef linuxaio_iteration +#undef linuxaio_submitcnt +#undef linuxaio_submitmax +#undef linuxaio_submits #undef loop_count #undef loop_depth #undef loop_done @@ -188,6 +258,8 @@ #undef sigfd_w #undef timeout_blocktime #undef timercnt +#undef timerfd +#undef timerfd_w #undef timermax #undef timers #undef userdata diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c index 7a5da9807..7e04adb97 100644 --- a/src/libserver/cfg_utils.c +++ b/src/libserver/cfg_utils.c @@ -2631,6 +2631,14 @@ rspamd_config_ev_backend_to_string (int ev_backend, gboolean *effective) } if (ev_backend & EVBACKEND_EPOLL) { + if (ev_backend & EVBACKEND_IOURING) { + SET_EFFECTIVE (TRUE); + return "epoll+io_uring"; + } + if (ev_backend & EVBACKEND_LINUXAIO) { + SET_EFFECTIVE (TRUE); + return "epoll+aio"; + } SET_EFFECTIVE (TRUE); return "epoll"; } |