Vsevolod Stakhov 4 weken geleden
bovenliggende
commit
cc49fb61ff
No account linked to committer's email address

+ 8
- 0
CMakeLists.txt Bestand weergeven

@@ -126,6 +126,7 @@ INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/"
"${CMAKE_SOURCE_DIR}/contrib/lua-lpeg"
"${CMAKE_SOURCE_DIR}/contrib/frozen/include"
"${CMAKE_SOURCE_DIR}/contrib/fu2/include"
"${CMAKE_SOURCE_DIR}/contrib/stringzilla/include"
"${CMAKE_BINARY_DIR}/src" #Stored in the binary dir
"${CMAKE_BINARY_DIR}/src/libcryptobox")

@@ -666,6 +667,13 @@ IF (ENABLE_LUA_REPL MATCHES "ON")
LIST(APPEND RSPAMD_REQUIRED_LIBRARIES rspamd-replxx)
ENDIF ()

ADD_SUBDIRECTORY(contrib/stringzilla)
LIST(APPEND RSPAMD_REQUIRED_LIBRARIES rspamd-stringzilla)
# Propagate to all targets, as we use those in the includes
FOREACH (DEFINITION ${SZ_DEFINITIONS})
ADD_DEFINITIONS(${DEFINITION})
ENDFOREACH ()

IF (ENABLE_SNOWBALL MATCHES "ON")
LIST(APPEND RSPAMD_REQUIRED_LIBRARIES stemmer)
ENDIF ()

+ 1
- 0
cmake/ArchDep.cmake Bestand weergeven

@@ -31,6 +31,7 @@ IF("${ARCH}" STREQUAL "x86_64" OR "${ARCH}" STREQUAL "i386")
#include <stddef.h>
#pragma GCC push_options
#pragma GCC target(\"avx2\")
#pragma clang attribute push(__attribute__((target(\"avx2\"))))
#ifndef __SSE2__
#define __SSE2__
#endif

+ 7
- 0
cmake/CompilerWarnings.cmake Bestand weergeven

@@ -23,6 +23,9 @@ CHECK_C_COMPILER_FLAG(-Wdeprecated-declarations SUPPORT_WDEPRECATED_DECLARATIONS
# Disable -Wsuggest-attribute=format: it is too noisy with FPs around fmt C++ library
CHECK_C_COMPILER_FLAG(-Wsuggest-attribute SUPPORT_WSUGGEST_ATTRIBUTE)

# Disable -Wunknown-pragmas: we have both clang and gcc pragmas
CHECK_C_COMPILER_FLAG(-Wunknown-pragmas SUPPORT_WUNKNOWN_PRAGMAS)

IF(SUPPORT_WEXTRA)
ADD_COMPILE_OPTIONS("-Wextra")
ENDIF(SUPPORT_WEXTRA)
@@ -87,3 +90,7 @@ ENDIF()
IF(SUPPORT_WDEPRECATED_DECLARATIONS)
ADD_COMPILE_OPTIONS("-Wno-deprecated-declarations")
ENDIF()

IF(SUPPORT_WUNKNOWN_PRAGMAS)
ADD_COMPILE_OPTIONS("-Wno-unknown-pragmas")
ENDIF()

+ 3
- 1
cmake/FindArch.cmake Bestand weergeven

@@ -1,6 +1,8 @@
set(archdetect_c_code "
#if defined(__arm__) || defined(__TARGET_ARCH_ARM)
#if defined(__ARM_ARCH_7__) \\
#if defined(__aarch64__) || defined(__ARM64__) || defined(_M_ARM64)
#error cmake_ARCH arm64
#elif defined(__ARM_ARCH_7__) \\
|| defined(__ARM_ARCH_7A__) \\
|| defined(__ARM_ARCH_7R__) \\
|| defined(__ARM_ARCH_7M__) \\

+ 1
- 0
contrib/DEPENDENCY_INFO.md Bestand weergeven

@@ -38,4 +38,5 @@
| ankerl/svector | 1.0.2 | MIT | NO | |
| ankerl/unordered_dense | 4.4.0 | MIT | NO | |
| backward-cpp | 1.6 | MIT | NO | |
| stringzilla | 3.5.0 | Apache2 | NO | |


+ 24
- 0
contrib/stringzilla/CMakeLists.txt Bestand weergeven

@@ -0,0 +1,24 @@
SET(STRINGZILLASRC lib.c)

SET(SZ_DEFINITIONS
"-DSZ_DYNAMIC_DISPATCH=1"
PARENT_SCOPE)

TARGET_ARCHITECTURE(ARCH)
IF ("${ARCH}" STREQUAL "x86_64")
LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_X86_AVX512=1")
LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_X86_AVX2=1")
LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_MISALIGNED_LOADS=1")
ENDIF ()
IF ("${ARCH}" STREQUAL "arm64")
LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_ARM_NEON=1")
LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_ARM_SVE=1")
LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_MISALIGNED_LOADS=1")
ENDIF ()

FOREACH (DEFINITION ${SZ_DEFINITIONS})
ADD_DEFINITIONS(${DEFINITION})
ENDFOREACH ()

ADD_LIBRARY(rspamd-stringzilla STATIC ${STRINGZILLASRC})
SET_TARGET_PROPERTIES(rspamd-stringzilla PROPERTIES VERSION ${RSPAMD_VERSION})

+ 5370
- 0
contrib/stringzilla/include/stringzilla/stringzilla.h
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


+ 3838
- 0
contrib/stringzilla/include/stringzilla/stringzilla.hpp
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


+ 337
- 0
contrib/stringzilla/lib.c Bestand weergeven

@@ -0,0 +1,337 @@
/**
* @file lib.c
* @brief StringZilla C library with dynamic backed dispatch for the most appropriate implementation.
* @author Ash Vardanian
* @date January 16, 2024
* @copyright Copyright (c) 2024
*/
#if defined(_WIN32) || defined(__CYGWIN__)
#include <windows.h> // `DllMain`
#endif

// Overwrite `SZ_DYNAMIC_DISPATCH` before including StringZilla.
#ifdef SZ_DYNAMIC_DISPATCH
#undef SZ_DYNAMIC_DISPATCH
#endif
#define SZ_DYNAMIC_DISPATCH 1
#include <stringzilla/stringzilla.h>

#if SZ_AVOID_LIBC
// If we don't have the LibC, the `malloc` definition in `stringzilla.h` will be illformed.
#ifdef _MSC_VER
typedef sz_size_t size_t; // Reuse the type definition we've inferred from `stringzilla.h`
#else
typedef __SIZE_TYPE__ size_t; // For GCC/Clang
#endif
int rand(void) { return 0; }
void free(void *start) { sz_unused(start); }
void *malloc(size_t length) {
sz_unused(length);
return SZ_NULL;
}
#endif

SZ_DYNAMIC sz_capability_t sz_capabilities(void) {

#if SZ_USE_X86_AVX512 || SZ_USE_X86_AVX2

/// The states of 4 registers populated for a specific "cpuid" assembly call
union four_registers_t {
int array[4];
struct separate_t {
unsigned eax, ebx, ecx, edx;
} named;
} info1, info7;

#ifdef _MSC_VER
__cpuidex(info1.array, 1, 0);
__cpuidex(info7.array, 7, 0);
#else
__asm__ __volatile__("cpuid"
: "=a"(info1.named.eax), "=b"(info1.named.ebx), "=c"(info1.named.ecx), "=d"(info1.named.edx)
: "a"(1), "c"(0));
__asm__ __volatile__("cpuid"
: "=a"(info7.named.eax), "=b"(info7.named.ebx), "=c"(info7.named.ecx), "=d"(info7.named.edx)
: "a"(7), "c"(0));
#endif

// Check for AVX2 (Function ID 7, EBX register)
// https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L148
unsigned supports_avx2 = (info7.named.ebx & 0x00000020) != 0;
// Check for AVX512F (Function ID 7, EBX register)
// https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L155
unsigned supports_avx512f = (info7.named.ebx & 0x00010000) != 0;
// Check for AVX512BW (Function ID 7, EBX register)
// https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L166
unsigned supports_avx512bw = (info7.named.ebx & 0x40000000) != 0;
// Check for AVX512VL (Function ID 7, EBX register)
// https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L167C25-L167C35
unsigned supports_avx512vl = (info7.named.ebx & 0x80000000) != 0;
// Check for GFNI (Function ID 1, ECX register)
// https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L171C30-L171C40
unsigned supports_avx512vbmi = (info1.named.ecx & 0x00000002) != 0;
// Check for GFNI (Function ID 1, ECX register)
// https://github.com/llvm/llvm-project/blob/50598f0ff44f3a4e75706f8c53f3380fe7faa896/clang/lib/Headers/cpuid.h#L177C30-L177C40
unsigned supports_gfni = (info1.named.ecx & 0x00000100) != 0;

return (sz_capability_t)( //
(sz_cap_x86_avx2_k * supports_avx2) | //
(sz_cap_x86_avx512f_k * supports_avx512f) | //
(sz_cap_x86_avx512vl_k * supports_avx512vl) | //
(sz_cap_x86_avx512bw_k * supports_avx512bw) | //
(sz_cap_x86_avx512vbmi_k * supports_avx512vbmi) | //
(sz_cap_x86_gfni_k * (supports_gfni)) | //
(sz_cap_serial_k));

#endif // SIMSIMD_TARGET_X86

#if SZ_USE_ARM_NEON || SZ_USE_ARM_SVE

// Every 64-bit Arm CPU supports NEON
unsigned supports_neon = 1;
unsigned supports_sve = 0;
unsigned supports_sve2 = 0;
sz_unused(supports_sve);
sz_unused(supports_sve2);

return (sz_capability_t)( //
(sz_cap_arm_neon_k * supports_neon) | //
(sz_cap_serial_k));

#endif // SIMSIMD_TARGET_ARM

return sz_cap_serial_k;
}

typedef struct sz_implementations_t {
sz_equal_t equal;
sz_order_t order;

sz_move_t copy;
sz_move_t move;
sz_fill_t fill;

sz_find_byte_t find_byte;
sz_find_byte_t rfind_byte;
sz_find_t find;
sz_find_t rfind;
sz_find_set_t find_from_set;
sz_find_set_t rfind_from_set;

sz_edit_distance_t edit_distance;
sz_alignment_score_t alignment_score;
sz_hashes_t hashes;

} sz_implementations_t;
static sz_implementations_t sz_dispatch_table;

/**
* @brief Initializes a global static "virtual table" of supported backends
* Run it just once to avoiding unnecessary `if`-s.
*/
static void sz_dispatch_table_init(void) {
sz_implementations_t *impl = &sz_dispatch_table;
sz_capability_t caps = sz_capabilities();
sz_unused(caps); //< Unused when compiling on pre-SIMD machines.

impl->equal = sz_equal_serial;
impl->order = sz_order_serial;
impl->copy = sz_copy_serial;
impl->move = sz_move_serial;
impl->fill = sz_fill_serial;

impl->find = sz_find_serial;
impl->rfind = sz_rfind_serial;
impl->find_byte = sz_find_byte_serial;
impl->rfind_byte = sz_rfind_byte_serial;
impl->find_from_set = sz_find_charset_serial;
impl->rfind_from_set = sz_rfind_charset_serial;

impl->edit_distance = sz_edit_distance_serial;
impl->alignment_score = sz_alignment_score_serial;
impl->hashes = sz_hashes_serial;

#if SZ_USE_X86_AVX2
if (caps & sz_cap_x86_avx2_k) {
impl->copy = sz_copy_avx2;
impl->move = sz_move_avx2;
impl->fill = sz_fill_avx2;
impl->find_byte = sz_find_byte_avx2;
impl->rfind_byte = sz_rfind_byte_avx2;
impl->find = sz_find_avx2;
impl->rfind = sz_rfind_avx2;
}
#endif

#if SZ_USE_X86_AVX512
if (caps & sz_cap_x86_avx512f_k) {
impl->equal = sz_equal_avx512;
impl->order = sz_order_avx512;
impl->copy = sz_copy_avx512;
impl->move = sz_move_avx512;
impl->fill = sz_fill_avx512;

impl->find = sz_find_avx512;
impl->rfind = sz_rfind_avx512;
impl->find_byte = sz_find_byte_avx512;
impl->rfind_byte = sz_rfind_byte_avx512;

impl->edit_distance = sz_edit_distance_avx512;
}

if ((caps & sz_cap_x86_avx512f_k) && (caps & sz_cap_x86_avx512vl_k) && (caps & sz_cap_x86_gfni_k) &&
(caps & sz_cap_x86_avx512bw_k) && (caps & sz_cap_x86_avx512vbmi_k)) {
impl->find_from_set = sz_find_charset_avx512;
impl->rfind_from_set = sz_rfind_charset_avx512;
impl->alignment_score = sz_alignment_score_avx512;
}
#endif

#if SZ_USE_ARM_NEON
if (caps & sz_cap_arm_neon_k) {
impl->find = sz_find_neon;
impl->rfind = sz_rfind_neon;
impl->find_byte = sz_find_byte_neon;
impl->rfind_byte = sz_rfind_byte_neon;
impl->find_from_set = sz_find_charset_neon;
impl->rfind_from_set = sz_rfind_charset_neon;
}
#endif
}

#if defined(_MSC_VER)
BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) {
switch (fdwReason) {
case DLL_PROCESS_ATTACH: sz_dispatch_table_init(); return TRUE;
case DLL_THREAD_ATTACH: return TRUE;
case DLL_THREAD_DETACH: return TRUE;
case DLL_PROCESS_DETACH: return TRUE;
}
}
#else
__attribute__((constructor)) static void sz_dispatch_table_init_on_gcc_or_clang(void) { sz_dispatch_table_init(); }
#endif

SZ_DYNAMIC sz_bool_t sz_equal(sz_cptr_t a, sz_cptr_t b, sz_size_t length) {
return sz_dispatch_table.equal(a, b, length);
}

SZ_DYNAMIC sz_ordering_t sz_order(sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length) {
return sz_dispatch_table.order(a, a_length, b, b_length);
}

SZ_DYNAMIC void sz_copy(sz_ptr_t target, sz_cptr_t source, sz_size_t length) {
sz_dispatch_table.copy(target, source, length);
}

SZ_DYNAMIC void sz_move(sz_ptr_t target, sz_cptr_t source, sz_size_t length) {
sz_dispatch_table.move(target, source, length);
}

SZ_DYNAMIC void sz_fill(sz_ptr_t target, sz_size_t length, sz_u8_t value) {
sz_dispatch_table.fill(target, length, value);
}

SZ_DYNAMIC sz_cptr_t sz_find_byte(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle) {
return sz_dispatch_table.find_byte(haystack, h_length, needle);
}

SZ_DYNAMIC sz_cptr_t sz_rfind_byte(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle) {
return sz_dispatch_table.rfind_byte(haystack, h_length, needle);
}

SZ_DYNAMIC sz_cptr_t sz_find(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle, sz_size_t n_length) {
return sz_dispatch_table.find(haystack, h_length, needle, n_length);
}

SZ_DYNAMIC sz_cptr_t sz_rfind(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle, sz_size_t n_length) {
return sz_dispatch_table.rfind(haystack, h_length, needle, n_length);
}

SZ_DYNAMIC sz_cptr_t sz_find_charset(sz_cptr_t text, sz_size_t length, sz_charset_t const *set) {
return sz_dispatch_table.find_from_set(text, length, set);
}

SZ_DYNAMIC sz_cptr_t sz_rfind_charset(sz_cptr_t text, sz_size_t length, sz_charset_t const *set) {
return sz_dispatch_table.rfind_from_set(text, length, set);
}

SZ_DYNAMIC sz_size_t sz_hamming_distance( //
sz_cptr_t a, sz_size_t a_length, //
sz_cptr_t b, sz_size_t b_length, //
sz_size_t bound) {
return sz_hamming_distance_serial(a, a_length, b, b_length, bound);
}

SZ_DYNAMIC sz_size_t sz_hamming_distance_utf8( //
sz_cptr_t a, sz_size_t a_length, //
sz_cptr_t b, sz_size_t b_length, //
sz_size_t bound) {
return sz_hamming_distance_utf8_serial(a, a_length, b, b_length, bound);
}

SZ_DYNAMIC sz_size_t sz_edit_distance( //
sz_cptr_t a, sz_size_t a_length, //
sz_cptr_t b, sz_size_t b_length, //
sz_size_t bound, sz_memory_allocator_t *alloc) {
return sz_dispatch_table.edit_distance(a, a_length, b, b_length, bound, alloc);
}

SZ_DYNAMIC sz_size_t sz_edit_distance_utf8( //
sz_cptr_t a, sz_size_t a_length, //
sz_cptr_t b, sz_size_t b_length, //
sz_size_t bound, sz_memory_allocator_t *alloc) {
return _sz_edit_distance_wagner_fisher_serial(a, a_length, b, b_length, bound, sz_true_k, alloc);
}

SZ_DYNAMIC sz_ssize_t sz_alignment_score(sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length,
sz_error_cost_t const *subs, sz_error_cost_t gap,
sz_memory_allocator_t *alloc) {
return sz_dispatch_table.alignment_score(a, a_length, b, b_length, subs, gap, alloc);
}

SZ_DYNAMIC void sz_hashes(sz_cptr_t text, sz_size_t length, sz_size_t window_length, sz_size_t step, //
sz_hash_callback_t callback, void *callback_handle) {
sz_dispatch_table.hashes(text, length, window_length, step, callback, callback_handle);
}

SZ_DYNAMIC sz_cptr_t sz_find_char_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) {
sz_charset_t set;
sz_charset_init(&set);
for (; n_length; ++n, --n_length) sz_charset_add(&set, *n);
return sz_find_charset(h, h_length, &set);
}

SZ_DYNAMIC sz_cptr_t sz_find_char_not_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) {
sz_charset_t set;
sz_charset_init(&set);
for (; n_length; ++n, --n_length) sz_charset_add(&set, *n);
sz_charset_invert(&set);
return sz_find_charset(h, h_length, &set);
}

SZ_DYNAMIC sz_cptr_t sz_rfind_char_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) {
sz_charset_t set;
sz_charset_init(&set);
for (; n_length; ++n, --n_length) sz_charset_add(&set, *n);
return sz_rfind_charset(h, h_length, &set);
}

SZ_DYNAMIC sz_cptr_t sz_rfind_char_not_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) {
sz_charset_t set;
sz_charset_init(&set);
for (; n_length; ++n, --n_length) sz_charset_add(&set, *n);
sz_charset_invert(&set);
return sz_rfind_charset(h, h_length, &set);
}

sz_u64_t _sz_random_generator(void *empty_state) {
sz_unused(empty_state);
return (sz_u64_t)rand();
}

SZ_DYNAMIC void sz_generate(sz_cptr_t alphabet, sz_size_t alphabet_size, sz_ptr_t result, sz_size_t result_length,
sz_random_generator_t generator, void *generator_user_data) {
if (!generator) generator = _sz_random_generator;
sz_generate_serial(alphabet, alphabet_size, result, result_length, generator, generator_user_data);
}

+ 18
- 18
src/libserver/hyperscan_tools.cxx Bestand weergeven

@@ -1,5 +1,5 @@
/*
* Copyright 2023 Vsevolod Stakhov
* Copyright 2024 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -17,11 +17,11 @@
#include "config.h"

#ifdef WITH_HYPERSCAN
#include <string>
#include <filesystem>
#include "contrib/ankerl/unordered_dense.h"
#include "contrib/ankerl/svector.h"
#include "fmt/core.h"
#include "libutil/cxx/string.hxx"
#include "libutil/cxx/file_util.hxx"
#include "libutil/cxx/error.hxx"
#include "hs.h"
@@ -91,9 +91,9 @@ namespace rspamd::util {
class hs_known_files_cache {
private:
// These fields are filled when we add new known cache files
ankerl::svector<std::string, 4> cache_dirs;
ankerl::svector<std::string, 8> cache_extensions;
ankerl::unordered_dense::set<std::string> known_cached_files;
ankerl::svector<sz::string, 4> cache_dirs;
ankerl::svector<sz::string, 8> cache_extensions;
ankerl::unordered_dense::set<sz::string> known_cached_files;
bool loaded = false;

private:
@@ -132,16 +132,16 @@ public:
return;
}

auto dir = fpath.parent_path();
auto ext = fpath.extension();
auto dir = sz::string{fpath.parent_path().c_str()};
auto ext = sz::string{fpath.extension().c_str()};

if (std::find_if(cache_dirs.begin(), cache_dirs.end(),
[&](const auto &item) { return item == dir; }) == std::end(cache_dirs)) {
cache_dirs.emplace_back(std::string{dir});
cache_dirs.emplace_back(sz::string{dir});
}
if (std::find_if(cache_extensions.begin(), cache_extensions.end(),
[&](const auto &item) { return item == ext; }) == std::end(cache_extensions)) {
cache_extensions.emplace_back(std::string{ext});
cache_extensions.emplace_back(sz::string{ext});
}

auto is_known = known_cached_files.insert(fpath.string());
@@ -162,16 +162,16 @@ public:
return;
}

auto dir = fpath.parent_path();
auto ext = fpath.extension();
auto dir = sz::string{fpath.parent_path().c_str()};
auto ext = sz::string{fpath.extension().c_str()};

if (std::find_if(cache_dirs.begin(), cache_dirs.end(),
[&](const auto &item) { return item == dir; }) == std::end(cache_dirs)) {
cache_dirs.emplace_back(dir.string());
cache_dirs.emplace_back(dir);
}
if (std::find_if(cache_extensions.begin(), cache_extensions.end(),
[&](const auto &item) { return item == ext; }) == std::end(cache_extensions)) {
cache_extensions.emplace_back(ext.string());
cache_extensions.emplace_back(ext);
}

auto is_known = known_cached_files.insert(fpath.string());
@@ -215,7 +215,7 @@ public:
/* We clean dir merely if we are running from the main process */
if (rspamd_current_worker == nullptr && env_cleanup_disable == nullptr && loaded) {
const auto *log_func = RSPAMD_LOG_FUNC;
auto cleanup_dir = [&](std::string_view dir) -> void {
auto cleanup_dir = [&](sz::string_view dir) -> void {
for (const auto &ext: cache_extensions) {
glob_t globbuf;

@@ -227,7 +227,7 @@ public:

if (glob(glob_pattern.c_str(), 0, nullptr, &globbuf) == 0) {
for (auto i = 0; i < globbuf.gl_pathc; i++) {
auto path = std::string{globbuf.gl_pathv[i]};
auto path = sz::string{globbuf.gl_pathv[i]};
std::size_t nsz;
struct stat st;

@@ -287,7 +287,7 @@ public:
struct hs_shared_database {
hs_database_t *db = nullptr; /**< internal database (might be in a shared memory) */
std::optional<raii_mmaped_file> maybe_map;
std::string cached_path;
sz::string cached_path;

~hs_shared_database()
{
@@ -335,7 +335,7 @@ struct real_hs_db {
std::uint32_t crc32;
};
static auto
hs_is_valid_database(void *raw, std::size_t len, std::string_view fname) -> tl::expected<bool, std::string>
hs_is_valid_database(void *raw, std::size_t len, sz::string_view fname) -> tl::expected<bool, sz::string>
{
if (len < sizeof(real_hs_db)) {
return tl::make_unexpected(fmt::format("cannot load hyperscan database from {}: too short", fname));
@@ -416,7 +416,7 @@ auto load_cached_hs_file(const char *fname, std::int64_t offset = 0) -> tl::expe
else {
auto &tmpfile_checked = tmpfile.value();
// Store owned string
auto tmpfile_name = std::string{tmpfile_checked.get_name()};
auto tmpfile_name = sz::string{tmpfile_checked.get_name()};
std::size_t unserialized_size;

if (auto ret = hs_serialized_database_size(((const char *) cached_serialized.get_map()) + offset,

+ 5
- 6
src/libutil/cxx/error.hxx Bestand weergeven

@@ -19,8 +19,7 @@
#pragma once

#include "config.h"
#include <string>
#include <string_view>
#include "libutil/cxx/string.hxx"
#include <cstdint>
#include <optional>

@@ -54,7 +53,7 @@ public:
* @param code
* @param category
*/
error(std::string &&msg, int code, error_category category = error_category::INFORMAL)
error(sz::string &&msg, int code, error_category category = error_category::INFORMAL)
: error_code(code), category(category)
{
static_storage = std::move(msg);
@@ -66,7 +65,7 @@ public:
* @param code
* @param category
*/
error(const std::string &msg, int code, error_category category = error_category::INFORMAL)
error(const sz::string &msg, int code, error_category category = error_category::INFORMAL)
: error_code(code), category(category)
{
static_storage = msg;
@@ -148,12 +147,12 @@ public:
}

public:
std::string_view error_message;
sz::string_view error_message;
int error_code;
error_category category;

private:
std::optional<std::string> static_storage;
std::optional<sz::string> static_storage;
};

}// namespace rspamd::util

+ 4
- 4
src/libutil/cxx/file_util.hxx Bestand weergeven

@@ -1,5 +1,5 @@
/*
* Copyright 2023 Vsevolod Stakhov
* Copyright 2024 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@
#include "config.h"
#include "contrib/expected/expected.hpp"
#include "libutil/cxx/error.hxx"
#include <string>
#include "libutil/cxx/string.hxx"
#include <sys/stat.h>

namespace rspamd::util {
@@ -33,12 +33,12 @@ public:
virtual ~raii_file() noexcept;

static auto open(const char *fname, int flags) -> tl::expected<raii_file, error>;
static auto open(const std::string &fname, int flags) -> tl::expected<raii_file, error>
static auto open(const sz::string &fname, int flags) -> tl::expected<raii_file, error>
{
return open(fname.c_str(), flags);
};
static auto create(const char *fname, int flags, int perms) -> tl::expected<raii_file, error>;
static auto create(const std::string &fname, int flags, int perms) -> tl::expected<raii_file, error>
static auto create(const sz::string &fname, int flags, int perms) -> tl::expected<raii_file, error>
{
return create(fname.c_str(), flags, perms);
};

+ 42
- 0
src/libutil/cxx/string.hxx Bestand weergeven

@@ -0,0 +1,42 @@
/*
* Copyright 2024 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

//
// Created by Vsevolod Stakhov on 09/02/2024.
//

#ifndef RSPAMD_STRING_HXX
#define RSPAMD_STRING_HXX

#include <vector>
#include <cstdint>// Stringzilla does not do it for some reason and uses std::<int> types
#include <stringzilla/stringzilla.hpp>
#include <fmt/core.h>
#include <string_view>
#include <string>

namespace sz = ashvardanian::stringzilla;
using sz::literals::operator""_sz;

template<>
struct fmt::formatter<sz::string_view> : formatter<std::string_view> {
};

template<>
struct fmt::formatter<sz::string> : formatter<std::string_view> {
};

#endif//RSPAMD_STRING_HXX

Laden…
Annuleren
Opslaan