"${CMAKE_SOURCE_DIR}/contrib/lua-lpeg" | "${CMAKE_SOURCE_DIR}/contrib/lua-lpeg" | ||||
"${CMAKE_SOURCE_DIR}/contrib/frozen/include" | "${CMAKE_SOURCE_DIR}/contrib/frozen/include" | ||||
"${CMAKE_SOURCE_DIR}/contrib/fu2/include" | "${CMAKE_SOURCE_DIR}/contrib/fu2/include" | ||||
"${CMAKE_SOURCE_DIR}/contrib/stringzilla/include" | |||||
"${CMAKE_BINARY_DIR}/src" #Stored in the binary dir | "${CMAKE_BINARY_DIR}/src" #Stored in the binary dir | ||||
"${CMAKE_BINARY_DIR}/src/libcryptobox") | "${CMAKE_BINARY_DIR}/src/libcryptobox") | ||||
LIST(APPEND RSPAMD_REQUIRED_LIBRARIES rspamd-replxx) | LIST(APPEND RSPAMD_REQUIRED_LIBRARIES rspamd-replxx) | ||||
ENDIF () | ENDIF () | ||||
ADD_SUBDIRECTORY(contrib/stringzilla) | |||||
LIST(APPEND RSPAMD_REQUIRED_LIBRARIES rspamd-stringzilla) | |||||
# Propagate to all targets, as we use those in the includes | |||||
FOREACH (DEFINITION ${SZ_DEFINITIONS}) | |||||
ADD_DEFINITIONS(${DEFINITION}) | |||||
ENDFOREACH () | |||||
IF (ENABLE_SNOWBALL MATCHES "ON") | IF (ENABLE_SNOWBALL MATCHES "ON") | ||||
LIST(APPEND RSPAMD_REQUIRED_LIBRARIES stemmer) | LIST(APPEND RSPAMD_REQUIRED_LIBRARIES stemmer) | ||||
ENDIF () | ENDIF () |
#include <stddef.h> | #include <stddef.h> | ||||
#pragma GCC push_options | #pragma GCC push_options | ||||
#pragma GCC target(\"avx2\") | #pragma GCC target(\"avx2\") | ||||
#pragma clang attribute push(__attribute__((target(\"avx2\")))) | |||||
#ifndef __SSE2__ | #ifndef __SSE2__ | ||||
#define __SSE2__ | #define __SSE2__ | ||||
#endif | #endif |
# Disable -Wsuggest-attribute=format: it is too noisy with FPs around fmt C++ library | # Disable -Wsuggest-attribute=format: it is too noisy with FPs around fmt C++ library | ||||
CHECK_C_COMPILER_FLAG(-Wsuggest-attribute SUPPORT_WSUGGEST_ATTRIBUTE) | CHECK_C_COMPILER_FLAG(-Wsuggest-attribute SUPPORT_WSUGGEST_ATTRIBUTE) | ||||
# Disable -Wunknown-pragmas: we have both clang and gcc pragmas | |||||
CHECK_C_COMPILER_FLAG(-Wunknown-pragmas SUPPORT_WUNKNOWN_PRAGMAS) | |||||
IF(SUPPORT_WEXTRA) | IF(SUPPORT_WEXTRA) | ||||
ADD_COMPILE_OPTIONS("-Wextra") | ADD_COMPILE_OPTIONS("-Wextra") | ||||
ENDIF(SUPPORT_WEXTRA) | ENDIF(SUPPORT_WEXTRA) | ||||
IF(SUPPORT_WDEPRECATED_DECLARATIONS) | IF(SUPPORT_WDEPRECATED_DECLARATIONS) | ||||
ADD_COMPILE_OPTIONS("-Wno-deprecated-declarations") | ADD_COMPILE_OPTIONS("-Wno-deprecated-declarations") | ||||
ENDIF() | ENDIF() | ||||
IF(SUPPORT_WUNKNOWN_PRAGMAS) | |||||
ADD_COMPILE_OPTIONS("-Wno-unknown-pragmas") | |||||
ENDIF() |
set(archdetect_c_code " | set(archdetect_c_code " | ||||
#if defined(__arm__) || defined(__TARGET_ARCH_ARM) | #if defined(__arm__) || defined(__TARGET_ARCH_ARM) | ||||
#if defined(__ARM_ARCH_7__) \\ | |||||
#if defined(__aarch64__) || defined(__ARM64__) || defined(_M_ARM64) | |||||
#error cmake_ARCH arm64 | |||||
#elif defined(__ARM_ARCH_7__) \\ | |||||
|| defined(__ARM_ARCH_7A__) \\ | || defined(__ARM_ARCH_7A__) \\ | ||||
|| defined(__ARM_ARCH_7R__) \\ | || defined(__ARM_ARCH_7R__) \\ | ||||
|| defined(__ARM_ARCH_7M__) \\ | || defined(__ARM_ARCH_7M__) \\ |
| ankerl/svector | 1.0.2 | MIT | NO | | | | ankerl/svector | 1.0.2 | MIT | NO | | | ||||
| ankerl/unordered_dense | 4.4.0 | MIT | NO | | | | ankerl/unordered_dense | 4.4.0 | MIT | NO | | | ||||
| backward-cpp | 1.6 | MIT | NO | | | | backward-cpp | 1.6 | MIT | NO | | | ||||
| stringzilla | 3.5.0 | Apache2 | NO | | | |||||
SET(STRINGZILLASRC lib.c) | |||||
SET(SZ_DEFINITIONS | |||||
"-DSZ_DYNAMIC_DISPATCH=1" | |||||
PARENT_SCOPE) | |||||
TARGET_ARCHITECTURE(ARCH) | |||||
IF ("${ARCH}" STREQUAL "x86_64") | |||||
LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_X86_AVX512=1") | |||||
LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_X86_AVX2=1") | |||||
LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_MISALIGNED_LOADS=1") | |||||
ENDIF () | |||||
IF ("${ARCH}" STREQUAL "arm64") | |||||
LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_ARM_NEON=1") | |||||
LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_ARM_SVE=1") | |||||
LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_MISALIGNED_LOADS=1") | |||||
ENDIF () | |||||
FOREACH (DEFINITION ${SZ_DEFINITIONS}) | |||||
ADD_DEFINITIONS(${DEFINITION}) | |||||
ENDFOREACH () | |||||
ADD_LIBRARY(rspamd-stringzilla STATIC ${STRINGZILLASRC}) | |||||
SET_TARGET_PROPERTIES(rspamd-stringzilla PROPERTIES VERSION ${RSPAMD_VERSION}) |
/** | |||||
* @file lib.c | |||||
* @brief StringZilla C library with dynamic backed dispatch for the most appropriate implementation. | |||||
* @author Ash Vardanian | |||||
* @date January 16, 2024 | |||||
* @copyright Copyright (c) 2024 | |||||
*/ | |||||
#if defined(_WIN32) || defined(__CYGWIN__) | |||||
#include <windows.h> // `DllMain` | |||||
#endif | |||||
// Overwrite `SZ_DYNAMIC_DISPATCH` before including StringZilla. | |||||
#ifdef SZ_DYNAMIC_DISPATCH | |||||
#undef SZ_DYNAMIC_DISPATCH | |||||
#endif | |||||
#define SZ_DYNAMIC_DISPATCH 1 | |||||
#include <stringzilla/stringzilla.h> | |||||
#if SZ_AVOID_LIBC | |||||
// If we don't have the LibC, the `malloc` definition in `stringzilla.h` will be illformed. | |||||
#ifdef _MSC_VER | |||||
typedef sz_size_t size_t; // Reuse the type definition we've inferred from `stringzilla.h` | |||||
#else | |||||
typedef __SIZE_TYPE__ size_t; // For GCC/Clang | |||||
#endif | |||||
int rand(void) { return 0; } | |||||
void free(void *start) { sz_unused(start); } | |||||
void *malloc(size_t length) { | |||||
sz_unused(length); | |||||
return SZ_NULL; | |||||
} | |||||
#endif | |||||
SZ_DYNAMIC sz_capability_t sz_capabilities(void) { | |||||
#if SZ_USE_X86_AVX512 || SZ_USE_X86_AVX2 | |||||
/// The states of 4 registers populated for a specific "cpuid" assembly call | |||||
union four_registers_t { | |||||
int array[4]; | |||||
struct separate_t { | |||||
unsigned eax, ebx, ecx, edx; | |||||
} named; | |||||
} info1, info7; | |||||
#ifdef _MSC_VER | |||||
__cpuidex(info1.array, 1, 0); | |||||
__cpuidex(info7.array, 7, 0); | |||||
#else | |||||
__asm__ __volatile__("cpuid" | |||||
: "=a"(info1.named.eax), "=b"(info1.named.ebx), "=c"(info1.named.ecx), "=d"(info1.named.edx) | |||||
: "a"(1), "c"(0)); | |||||
__asm__ __volatile__("cpuid" | |||||
: "=a"(info7.named.eax), "=b"(info7.named.ebx), "=c"(info7.named.ecx), "=d"(info7.named.edx) | |||||
: "a"(7), "c"(0)); | |||||
#endif | |||||
// Check for AVX2 (Function ID 7, EBX register) | |||||
// https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L148 | |||||
unsigned supports_avx2 = (info7.named.ebx & 0x00000020) != 0; | |||||
// Check for AVX512F (Function ID 7, EBX register) | |||||
// https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L155 | |||||
unsigned supports_avx512f = (info7.named.ebx & 0x00010000) != 0; | |||||
// Check for AVX512BW (Function ID 7, EBX register) | |||||
// https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L166 | |||||
unsigned supports_avx512bw = (info7.named.ebx & 0x40000000) != 0; | |||||
// Check for AVX512VL (Function ID 7, EBX register) | |||||
// https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L167C25-L167C35 | |||||
unsigned supports_avx512vl = (info7.named.ebx & 0x80000000) != 0; | |||||
// Check for GFNI (Function ID 1, ECX register) | |||||
// https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L171C30-L171C40 | |||||
unsigned supports_avx512vbmi = (info1.named.ecx & 0x00000002) != 0; | |||||
// Check for GFNI (Function ID 1, ECX register) | |||||
// https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L177C30-L177C40 | |||||
unsigned supports_gfni = (info1.named.ecx & 0x00000100) != 0; | |||||
return (sz_capability_t)( // | |||||
(sz_cap_x86_avx2_k * supports_avx2) | // | |||||
(sz_cap_x86_avx512f_k * supports_avx512f) | // | |||||
(sz_cap_x86_avx512vl_k * supports_avx512vl) | // | |||||
(sz_cap_x86_avx512bw_k * supports_avx512bw) | // | |||||
(sz_cap_x86_avx512vbmi_k * supports_avx512vbmi) | // | |||||
(sz_cap_x86_gfni_k * (supports_gfni)) | // | |||||
(sz_cap_serial_k)); | |||||
#endif // SIMSIMD_TARGET_X86 | |||||
#if SZ_USE_ARM_NEON || SZ_USE_ARM_SVE | |||||
// Every 64-bit Arm CPU supports NEON | |||||
unsigned supports_neon = 1; | |||||
unsigned supports_sve = 0; | |||||
unsigned supports_sve2 = 0; | |||||
sz_unused(supports_sve); | |||||
sz_unused(supports_sve2); | |||||
return (sz_capability_t)( // | |||||
(sz_cap_arm_neon_k * supports_neon) | // | |||||
(sz_cap_serial_k)); | |||||
#endif // SIMSIMD_TARGET_ARM | |||||
return sz_cap_serial_k; | |||||
} | |||||
typedef struct sz_implementations_t { | |||||
sz_equal_t equal; | |||||
sz_order_t order; | |||||
sz_move_t copy; | |||||
sz_move_t move; | |||||
sz_fill_t fill; | |||||
sz_find_byte_t find_byte; | |||||
sz_find_byte_t rfind_byte; | |||||
sz_find_t find; | |||||
sz_find_t rfind; | |||||
sz_find_set_t find_from_set; | |||||
sz_find_set_t rfind_from_set; | |||||
sz_edit_distance_t edit_distance; | |||||
sz_alignment_score_t alignment_score; | |||||
sz_hashes_t hashes; | |||||
} sz_implementations_t; | |||||
static sz_implementations_t sz_dispatch_table; | |||||
/** | |||||
* @brief Initializes a global static "virtual table" of supported backends | |||||
* Run it just once to avoiding unnecessary `if`-s. | |||||
*/ | |||||
static void sz_dispatch_table_init(void) { | |||||
sz_implementations_t *impl = &sz_dispatch_table; | |||||
sz_capability_t caps = sz_capabilities(); | |||||
sz_unused(caps); //< Unused when compiling on pre-SIMD machines. | |||||
impl->equal = sz_equal_serial; | |||||
impl->order = sz_order_serial; | |||||
impl->copy = sz_copy_serial; | |||||
impl->move = sz_move_serial; | |||||
impl->fill = sz_fill_serial; | |||||
impl->find = sz_find_serial; | |||||
impl->rfind = sz_rfind_serial; | |||||
impl->find_byte = sz_find_byte_serial; | |||||
impl->rfind_byte = sz_rfind_byte_serial; | |||||
impl->find_from_set = sz_find_charset_serial; | |||||
impl->rfind_from_set = sz_rfind_charset_serial; | |||||
impl->edit_distance = sz_edit_distance_serial; | |||||
impl->alignment_score = sz_alignment_score_serial; | |||||
impl->hashes = sz_hashes_serial; | |||||
#if SZ_USE_X86_AVX2 | |||||
if (caps & sz_cap_x86_avx2_k) { | |||||
impl->copy = sz_copy_avx2; | |||||
impl->move = sz_move_avx2; | |||||
impl->fill = sz_fill_avx2; | |||||
impl->find_byte = sz_find_byte_avx2; | |||||
impl->rfind_byte = sz_rfind_byte_avx2; | |||||
impl->find = sz_find_avx2; | |||||
impl->rfind = sz_rfind_avx2; | |||||
} | |||||
#endif | |||||
#if SZ_USE_X86_AVX512 | |||||
if (caps & sz_cap_x86_avx512f_k) { | |||||
impl->equal = sz_equal_avx512; | |||||
impl->order = sz_order_avx512; | |||||
impl->copy = sz_copy_avx512; | |||||
impl->move = sz_move_avx512; | |||||
impl->fill = sz_fill_avx512; | |||||
impl->find = sz_find_avx512; | |||||
impl->rfind = sz_rfind_avx512; | |||||
impl->find_byte = sz_find_byte_avx512; | |||||
impl->rfind_byte = sz_rfind_byte_avx512; | |||||
impl->edit_distance = sz_edit_distance_avx512; | |||||
} | |||||
if ((caps & sz_cap_x86_avx512f_k) && (caps & sz_cap_x86_avx512vl_k) && (caps & sz_cap_x86_gfni_k) && | |||||
(caps & sz_cap_x86_avx512bw_k) && (caps & sz_cap_x86_avx512vbmi_k)) { | |||||
impl->find_from_set = sz_find_charset_avx512; | |||||
impl->rfind_from_set = sz_rfind_charset_avx512; | |||||
impl->alignment_score = sz_alignment_score_avx512; | |||||
} | |||||
#endif | |||||
#if SZ_USE_ARM_NEON | |||||
if (caps & sz_cap_arm_neon_k) { | |||||
impl->find = sz_find_neon; | |||||
impl->rfind = sz_rfind_neon; | |||||
impl->find_byte = sz_find_byte_neon; | |||||
impl->rfind_byte = sz_rfind_byte_neon; | |||||
impl->find_from_set = sz_find_charset_neon; | |||||
impl->rfind_from_set = sz_rfind_charset_neon; | |||||
} | |||||
#endif | |||||
} | |||||
#if defined(_MSC_VER) | |||||
BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) { | |||||
switch (fdwReason) { | |||||
case DLL_PROCESS_ATTACH: sz_dispatch_table_init(); return TRUE; | |||||
case DLL_THREAD_ATTACH: return TRUE; | |||||
case DLL_THREAD_DETACH: return TRUE; | |||||
case DLL_PROCESS_DETACH: return TRUE; | |||||
} | |||||
} | |||||
#else | |||||
__attribute__((constructor)) static void sz_dispatch_table_init_on_gcc_or_clang(void) { sz_dispatch_table_init(); } | |||||
#endif | |||||
SZ_DYNAMIC sz_bool_t sz_equal(sz_cptr_t a, sz_cptr_t b, sz_size_t length) { | |||||
return sz_dispatch_table.equal(a, b, length); | |||||
} | |||||
SZ_DYNAMIC sz_ordering_t sz_order(sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length) { | |||||
return sz_dispatch_table.order(a, a_length, b, b_length); | |||||
} | |||||
SZ_DYNAMIC void sz_copy(sz_ptr_t target, sz_cptr_t source, sz_size_t length) { | |||||
sz_dispatch_table.copy(target, source, length); | |||||
} | |||||
SZ_DYNAMIC void sz_move(sz_ptr_t target, sz_cptr_t source, sz_size_t length) { | |||||
sz_dispatch_table.move(target, source, length); | |||||
} | |||||
SZ_DYNAMIC void sz_fill(sz_ptr_t target, sz_size_t length, sz_u8_t value) { | |||||
sz_dispatch_table.fill(target, length, value); | |||||
} | |||||
SZ_DYNAMIC sz_cptr_t sz_find_byte(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle) { | |||||
return sz_dispatch_table.find_byte(haystack, h_length, needle); | |||||
} | |||||
SZ_DYNAMIC sz_cptr_t sz_rfind_byte(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle) { | |||||
return sz_dispatch_table.rfind_byte(haystack, h_length, needle); | |||||
} | |||||
SZ_DYNAMIC sz_cptr_t sz_find(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle, sz_size_t n_length) { | |||||
return sz_dispatch_table.find(haystack, h_length, needle, n_length); | |||||
} | |||||
SZ_DYNAMIC sz_cptr_t sz_rfind(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle, sz_size_t n_length) { | |||||
return sz_dispatch_table.rfind(haystack, h_length, needle, n_length); | |||||
} | |||||
SZ_DYNAMIC sz_cptr_t sz_find_charset(sz_cptr_t text, sz_size_t length, sz_charset_t const *set) { | |||||
return sz_dispatch_table.find_from_set(text, length, set); | |||||
} | |||||
SZ_DYNAMIC sz_cptr_t sz_rfind_charset(sz_cptr_t text, sz_size_t length, sz_charset_t const *set) { | |||||
return sz_dispatch_table.rfind_from_set(text, length, set); | |||||
} | |||||
SZ_DYNAMIC sz_size_t sz_hamming_distance( // | |||||
sz_cptr_t a, sz_size_t a_length, // | |||||
sz_cptr_t b, sz_size_t b_length, // | |||||
sz_size_t bound) { | |||||
return sz_hamming_distance_serial(a, a_length, b, b_length, bound); | |||||
} | |||||
SZ_DYNAMIC sz_size_t sz_hamming_distance_utf8( // | |||||
sz_cptr_t a, sz_size_t a_length, // | |||||
sz_cptr_t b, sz_size_t b_length, // | |||||
sz_size_t bound) { | |||||
return sz_hamming_distance_utf8_serial(a, a_length, b, b_length, bound); | |||||
} | |||||
SZ_DYNAMIC sz_size_t sz_edit_distance( // | |||||
sz_cptr_t a, sz_size_t a_length, // | |||||
sz_cptr_t b, sz_size_t b_length, // | |||||
sz_size_t bound, sz_memory_allocator_t *alloc) { | |||||
return sz_dispatch_table.edit_distance(a, a_length, b, b_length, bound, alloc); | |||||
} | |||||
SZ_DYNAMIC sz_size_t sz_edit_distance_utf8( // | |||||
sz_cptr_t a, sz_size_t a_length, // | |||||
sz_cptr_t b, sz_size_t b_length, // | |||||
sz_size_t bound, sz_memory_allocator_t *alloc) { | |||||
return _sz_edit_distance_wagner_fisher_serial(a, a_length, b, b_length, bound, sz_true_k, alloc); | |||||
} | |||||
SZ_DYNAMIC sz_ssize_t sz_alignment_score(sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length, | |||||
sz_error_cost_t const *subs, sz_error_cost_t gap, | |||||
sz_memory_allocator_t *alloc) { | |||||
return sz_dispatch_table.alignment_score(a, a_length, b, b_length, subs, gap, alloc); | |||||
} | |||||
SZ_DYNAMIC void sz_hashes(sz_cptr_t text, sz_size_t length, sz_size_t window_length, sz_size_t step, // | |||||
sz_hash_callback_t callback, void *callback_handle) { | |||||
sz_dispatch_table.hashes(text, length, window_length, step, callback, callback_handle); | |||||
} | |||||
SZ_DYNAMIC sz_cptr_t sz_find_char_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) { | |||||
sz_charset_t set; | |||||
sz_charset_init(&set); | |||||
for (; n_length; ++n, --n_length) sz_charset_add(&set, *n); | |||||
return sz_find_charset(h, h_length, &set); | |||||
} | |||||
SZ_DYNAMIC sz_cptr_t sz_find_char_not_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) { | |||||
sz_charset_t set; | |||||
sz_charset_init(&set); | |||||
for (; n_length; ++n, --n_length) sz_charset_add(&set, *n); | |||||
sz_charset_invert(&set); | |||||
return sz_find_charset(h, h_length, &set); | |||||
} | |||||
SZ_DYNAMIC sz_cptr_t sz_rfind_char_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) { | |||||
sz_charset_t set; | |||||
sz_charset_init(&set); | |||||
for (; n_length; ++n, --n_length) sz_charset_add(&set, *n); | |||||
return sz_rfind_charset(h, h_length, &set); | |||||
} | |||||
SZ_DYNAMIC sz_cptr_t sz_rfind_char_not_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) { | |||||
sz_charset_t set; | |||||
sz_charset_init(&set); | |||||
for (; n_length; ++n, --n_length) sz_charset_add(&set, *n); | |||||
sz_charset_invert(&set); | |||||
return sz_rfind_charset(h, h_length, &set); | |||||
} | |||||
sz_u64_t _sz_random_generator(void *empty_state) { | |||||
sz_unused(empty_state); | |||||
return (sz_u64_t)rand(); | |||||
} | |||||
SZ_DYNAMIC void sz_generate(sz_cptr_t alphabet, sz_size_t alphabet_size, sz_ptr_t result, sz_size_t result_length, | |||||
sz_random_generator_t generator, void *generator_user_data) { | |||||
if (!generator) generator = _sz_random_generator; | |||||
sz_generate_serial(alphabet, alphabet_size, result, result_length, generator, generator_user_data); | |||||
} |
/* | /* | ||||
* Copyright 2023 Vsevolod Stakhov | |||||
* Copyright 2024 Vsevolod Stakhov | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
#include "config.h" | #include "config.h" | ||||
#ifdef WITH_HYPERSCAN | #ifdef WITH_HYPERSCAN | ||||
#include <string> | |||||
#include <filesystem> | #include <filesystem> | ||||
#include "contrib/ankerl/unordered_dense.h" | #include "contrib/ankerl/unordered_dense.h" | ||||
#include "contrib/ankerl/svector.h" | #include "contrib/ankerl/svector.h" | ||||
#include "fmt/core.h" | #include "fmt/core.h" | ||||
#include "libutil/cxx/string.hxx" | |||||
#include "libutil/cxx/file_util.hxx" | #include "libutil/cxx/file_util.hxx" | ||||
#include "libutil/cxx/error.hxx" | #include "libutil/cxx/error.hxx" | ||||
#include "hs.h" | #include "hs.h" | ||||
class hs_known_files_cache { | class hs_known_files_cache { | ||||
private: | private: | ||||
// These fields are filled when we add new known cache files | // These fields are filled when we add new known cache files | ||||
ankerl::svector<std::string, 4> cache_dirs; | |||||
ankerl::svector<std::string, 8> cache_extensions; | |||||
ankerl::unordered_dense::set<std::string> known_cached_files; | |||||
ankerl::svector<sz::string, 4> cache_dirs; | |||||
ankerl::svector<sz::string, 8> cache_extensions; | |||||
ankerl::unordered_dense::set<sz::string> known_cached_files; | |||||
bool loaded = false; | bool loaded = false; | ||||
private: | private: | ||||
return; | return; | ||||
} | } | ||||
auto dir = fpath.parent_path(); | |||||
auto ext = fpath.extension(); | |||||
auto dir = sz::string{fpath.parent_path().c_str()}; | |||||
auto ext = sz::string{fpath.extension().c_str()}; | |||||
if (std::find_if(cache_dirs.begin(), cache_dirs.end(), | if (std::find_if(cache_dirs.begin(), cache_dirs.end(), | ||||
[&](const auto &item) { return item == dir; }) == std::end(cache_dirs)) { | [&](const auto &item) { return item == dir; }) == std::end(cache_dirs)) { | ||||
cache_dirs.emplace_back(std::string{dir}); | |||||
cache_dirs.emplace_back(sz::string{dir}); | |||||
} | } | ||||
if (std::find_if(cache_extensions.begin(), cache_extensions.end(), | if (std::find_if(cache_extensions.begin(), cache_extensions.end(), | ||||
[&](const auto &item) { return item == ext; }) == std::end(cache_extensions)) { | [&](const auto &item) { return item == ext; }) == std::end(cache_extensions)) { | ||||
cache_extensions.emplace_back(std::string{ext}); | |||||
cache_extensions.emplace_back(sz::string{ext}); | |||||
} | } | ||||
auto is_known = known_cached_files.insert(fpath.string()); | auto is_known = known_cached_files.insert(fpath.string()); | ||||
return; | return; | ||||
} | } | ||||
auto dir = fpath.parent_path(); | |||||
auto ext = fpath.extension(); | |||||
auto dir = sz::string{fpath.parent_path().c_str()}; | |||||
auto ext = sz::string{fpath.extension().c_str()}; | |||||
if (std::find_if(cache_dirs.begin(), cache_dirs.end(), | if (std::find_if(cache_dirs.begin(), cache_dirs.end(), | ||||
[&](const auto &item) { return item == dir; }) == std::end(cache_dirs)) { | [&](const auto &item) { return item == dir; }) == std::end(cache_dirs)) { | ||||
cache_dirs.emplace_back(dir.string()); | |||||
cache_dirs.emplace_back(dir); | |||||
} | } | ||||
if (std::find_if(cache_extensions.begin(), cache_extensions.end(), | if (std::find_if(cache_extensions.begin(), cache_extensions.end(), | ||||
[&](const auto &item) { return item == ext; }) == std::end(cache_extensions)) { | [&](const auto &item) { return item == ext; }) == std::end(cache_extensions)) { | ||||
cache_extensions.emplace_back(ext.string()); | |||||
cache_extensions.emplace_back(ext); | |||||
} | } | ||||
auto is_known = known_cached_files.insert(fpath.string()); | auto is_known = known_cached_files.insert(fpath.string()); | ||||
/* We clean dir merely if we are running from the main process */ | /* We clean dir merely if we are running from the main process */ | ||||
if (rspamd_current_worker == nullptr && env_cleanup_disable == nullptr && loaded) { | if (rspamd_current_worker == nullptr && env_cleanup_disable == nullptr && loaded) { | ||||
const auto *log_func = RSPAMD_LOG_FUNC; | const auto *log_func = RSPAMD_LOG_FUNC; | ||||
auto cleanup_dir = [&](std::string_view dir) -> void { | |||||
auto cleanup_dir = [&](sz::string_view dir) -> void { | |||||
for (const auto &ext: cache_extensions) { | for (const auto &ext: cache_extensions) { | ||||
glob_t globbuf; | glob_t globbuf; | ||||
if (glob(glob_pattern.c_str(), 0, nullptr, &globbuf) == 0) { | if (glob(glob_pattern.c_str(), 0, nullptr, &globbuf) == 0) { | ||||
for (auto i = 0; i < globbuf.gl_pathc; i++) { | for (auto i = 0; i < globbuf.gl_pathc; i++) { | ||||
auto path = std::string{globbuf.gl_pathv[i]}; | |||||
auto path = sz::string{globbuf.gl_pathv[i]}; | |||||
std::size_t nsz; | std::size_t nsz; | ||||
struct stat st; | struct stat st; | ||||
struct hs_shared_database { | struct hs_shared_database { | ||||
hs_database_t *db = nullptr; /**< internal database (might be in a shared memory) */ | hs_database_t *db = nullptr; /**< internal database (might be in a shared memory) */ | ||||
std::optional<raii_mmaped_file> maybe_map; | std::optional<raii_mmaped_file> maybe_map; | ||||
std::string cached_path; | |||||
sz::string cached_path; | |||||
~hs_shared_database() | ~hs_shared_database() | ||||
{ | { | ||||
std::uint32_t crc32; | std::uint32_t crc32; | ||||
}; | }; | ||||
static auto | static auto | ||||
hs_is_valid_database(void *raw, std::size_t len, std::string_view fname) -> tl::expected<bool, std::string> | |||||
hs_is_valid_database(void *raw, std::size_t len, sz::string_view fname) -> tl::expected<bool, sz::string> | |||||
{ | { | ||||
if (len < sizeof(real_hs_db)) { | if (len < sizeof(real_hs_db)) { | ||||
return tl::make_unexpected(fmt::format("cannot load hyperscan database from {}: too short", fname)); | return tl::make_unexpected(fmt::format("cannot load hyperscan database from {}: too short", fname)); | ||||
else { | else { | ||||
auto &tmpfile_checked = tmpfile.value(); | auto &tmpfile_checked = tmpfile.value(); | ||||
// Store owned string | // Store owned string | ||||
auto tmpfile_name = std::string{tmpfile_checked.get_name()}; | |||||
auto tmpfile_name = sz::string{tmpfile_checked.get_name()}; | |||||
std::size_t unserialized_size; | std::size_t unserialized_size; | ||||
if (auto ret = hs_serialized_database_size(((const char *) cached_serialized.get_map()) + offset, | if (auto ret = hs_serialized_database_size(((const char *) cached_serialized.get_map()) + offset, |
#pragma once | #pragma once | ||||
#include "config.h" | #include "config.h" | ||||
#include <string> | |||||
#include <string_view> | |||||
#include "libutil/cxx/string.hxx" | |||||
#include <cstdint> | #include <cstdint> | ||||
#include <optional> | #include <optional> | ||||
* @param code | * @param code | ||||
* @param category | * @param category | ||||
*/ | */ | ||||
error(std::string &&msg, int code, error_category category = error_category::INFORMAL) | |||||
error(sz::string &&msg, int code, error_category category = error_category::INFORMAL) | |||||
: error_code(code), category(category) | : error_code(code), category(category) | ||||
{ | { | ||||
static_storage = std::move(msg); | static_storage = std::move(msg); | ||||
* @param code | * @param code | ||||
* @param category | * @param category | ||||
*/ | */ | ||||
error(const std::string &msg, int code, error_category category = error_category::INFORMAL) | |||||
error(const sz::string &msg, int code, error_category category = error_category::INFORMAL) | |||||
: error_code(code), category(category) | : error_code(code), category(category) | ||||
{ | { | ||||
static_storage = msg; | static_storage = msg; | ||||
} | } | ||||
public: | public: | ||||
std::string_view error_message; | |||||
sz::string_view error_message; | |||||
int error_code; | int error_code; | ||||
error_category category; | error_category category; | ||||
private: | private: | ||||
std::optional<std::string> static_storage; | |||||
std::optional<sz::string> static_storage; | |||||
}; | }; | ||||
}// namespace rspamd::util | }// namespace rspamd::util |
/* | /* | ||||
* Copyright 2023 Vsevolod Stakhov | |||||
* Copyright 2024 Vsevolod Stakhov | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
#include "config.h" | #include "config.h" | ||||
#include "contrib/expected/expected.hpp" | #include "contrib/expected/expected.hpp" | ||||
#include "libutil/cxx/error.hxx" | #include "libutil/cxx/error.hxx" | ||||
#include <string> | |||||
#include "libutil/cxx/string.hxx" | |||||
#include <sys/stat.h> | #include <sys/stat.h> | ||||
namespace rspamd::util { | namespace rspamd::util { | ||||
virtual ~raii_file() noexcept; | virtual ~raii_file() noexcept; | ||||
static auto open(const char *fname, int flags) -> tl::expected<raii_file, error>; | static auto open(const char *fname, int flags) -> tl::expected<raii_file, error>; | ||||
static auto open(const std::string &fname, int flags) -> tl::expected<raii_file, error> | |||||
static auto open(const sz::string &fname, int flags) -> tl::expected<raii_file, error> | |||||
{ | { | ||||
return open(fname.c_str(), flags); | return open(fname.c_str(), flags); | ||||
}; | }; | ||||
static auto create(const char *fname, int flags, int perms) -> tl::expected<raii_file, error>; | static auto create(const char *fname, int flags, int perms) -> tl::expected<raii_file, error>; | ||||
static auto create(const std::string &fname, int flags, int perms) -> tl::expected<raii_file, error> | |||||
static auto create(const sz::string &fname, int flags, int perms) -> tl::expected<raii_file, error> | |||||
{ | { | ||||
return create(fname.c_str(), flags, perms); | return create(fname.c_str(), flags, perms); | ||||
}; | }; |
/* | |||||
* Copyright 2024 Vsevolod Stakhov | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
// | |||||
// Created by Vsevolod Stakhov on 09/02/2024. | |||||
// | |||||
#ifndef RSPAMD_STRING_HXX | |||||
#define RSPAMD_STRING_HXX | |||||
#include <vector> | |||||
#include <cstdint>// Stringzilla does not do it for some reason and uses std::<int> types | |||||
#include <stringzilla/stringzilla.hpp> | |||||
#include <fmt/core.h> | |||||
#include <string_view> | |||||
#include <string> | |||||
namespace sz = ashvardanian::stringzilla; | |||||
using sz::literals::operator""_sz; | |||||
template<> | |||||
struct fmt::formatter<sz::string_view> : formatter<std::string_view> { | |||||
}; | |||||
template<> | |||||
struct fmt::formatter<sz::string> : formatter<std::string_view> { | |||||
}; | |||||
#endif//RSPAMD_STRING_HXX |