]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Attach stringzilla library
authorVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 9 Feb 2024 13:53:08 +0000 (13:53 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 9 Feb 2024 13:53:08 +0000 (13:53 +0000)
cmake/ArchDep.cmake
cmake/FindArch.cmake
contrib/stringzilla/CMakeLists.txt
src/libserver/hyperscan_tools.cxx
src/libutil/cxx/error.hxx
src/libutil/cxx/file_util.hxx

index 8271709b6397e16d160ab49fe9fe88c501c3a9cb..0dc46591e5eeeaf2a22a711a723f30ee26343b76 100644 (file)
@@ -31,6 +31,7 @@ IF("${ARCH}" STREQUAL "x86_64" OR "${ARCH}" STREQUAL "i386")
 #include <stddef.h>
 #pragma GCC push_options
 #pragma GCC target(\"avx2\")
+#pragma clang attribute push(__attribute__((target(\"avx2\"))))
 #ifndef __SSE2__
 #define __SSE2__
 #endif
index e172207371d3a262860db4a75550711010b87417..ab1aab077bc6533ff050de6eb141f5cd8cef372f 100644 (file)
@@ -1,6 +1,8 @@
 set(archdetect_c_code "
 #if defined(__arm__) || defined(__TARGET_ARCH_ARM)
-    #if defined(__ARM_ARCH_7__) \\
+    #if defined(__aarch64__) || defined(__ARM64__) || defined(_M_ARM64)
+        #error cmake_ARCH arm64
+    #elif defined(__ARM_ARCH_7__) \\
         || defined(__ARM_ARCH_7A__) \\
         || defined(__ARM_ARCH_7R__) \\
         || defined(__ARM_ARCH_7M__) \\
index 779b6faf5da71c2f0774c6ab8d0112fa4796ae31..71616093c256de76b4e70d685ccf0faee86c617d 100644 (file)
@@ -2,13 +2,20 @@ SET(STRINGZILLASRC            lib.c)
 
 SET(SZ_DEFINITIONS
         "-DSZ_DYNAMIC_DISPATCH=1"
-        "-DSZ_USE_MISALIGNED_LOADS=1"
-        "-DSZ_USE_X86_AVX512=1"
-        "-DSZ_USE_X86_AVX2=1"
-        "-DSZ_USE_ARM_NEON=1"
-        "-DSZ_USE_ARM_SVE=1"
         PARENT_SCOPE)
 
+TARGET_ARCHITECTURE(ARCH)
+IF ("${ARCH}" STREQUAL "x86_64")
+    LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_X86_AVX512=1")
+    LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_X86_AVX2=1")
+    LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_MISALIGNED_LOADS=1")
+ENDIF ()
+IF ("${ARCH}" STREQUAL "arm64")
+    LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_ARM_NEON=1")
+    LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_ARM_SVE=1")
+    LIST(APPEND SZ_DEFINITIONS "-DSZ_USE_MISALIGNED_LOADS=1")
+ENDIF ()
+
 FOREACH (DEFINITION ${SZ_DEFINITIONS})
     ADD_DEFINITIONS(${DEFINITION})
 ENDFOREACH ()
index 7d1ecf3376cab56e06f3b267414a00148e0a02b0..701b2b25ccb439c505547dafcfe265722a10dd65 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright 2023 Vsevolod Stakhov
+ * Copyright 2024 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
 #include "config.h"
 
 #ifdef WITH_HYPERSCAN
-#include <string>
 #include <filesystem>
 #include "contrib/ankerl/unordered_dense.h"
 #include "contrib/ankerl/svector.h"
 #include "fmt/core.h"
+#include "libutil/cxx/string.hxx"
 #include "libutil/cxx/file_util.hxx"
 #include "libutil/cxx/error.hxx"
 #include "hs.h"
@@ -91,9 +91,9 @@ namespace rspamd::util {
 class hs_known_files_cache {
 private:
        // These fields are filled when we add new known cache files
-       ankerl::svector<std::string, 4> cache_dirs;
-       ankerl::svector<std::string, 8> cache_extensions;
-       ankerl::unordered_dense::set<std::string> known_cached_files;
+       ankerl::svector<sz::string, 4> cache_dirs;
+       ankerl::svector<sz::string, 8> cache_extensions;
+       ankerl::unordered_dense::set<sz::string> known_cached_files;
        bool loaded = false;
 
 private:
@@ -132,16 +132,16 @@ public:
                        return;
                }
 
-               auto dir = fpath.parent_path();
-               auto ext = fpath.extension();
+               auto dir = sz::string{fpath.parent_path().c_str()};
+               auto ext = sz::string{fpath.extension().c_str()};
 
                if (std::find_if(cache_dirs.begin(), cache_dirs.end(),
                                                 [&](const auto &item) { return item == dir; }) == std::end(cache_dirs)) {
-                       cache_dirs.emplace_back(std::string{dir});
+                       cache_dirs.emplace_back(sz::string{dir});
                }
                if (std::find_if(cache_extensions.begin(), cache_extensions.end(),
                                                 [&](const auto &item) { return item == ext; }) == std::end(cache_extensions)) {
-                       cache_extensions.emplace_back(std::string{ext});
+                       cache_extensions.emplace_back(sz::string{ext});
                }
 
                auto is_known = known_cached_files.insert(fpath.string());
@@ -162,16 +162,16 @@ public:
                        return;
                }
 
-               auto dir = fpath.parent_path();
-               auto ext = fpath.extension();
+               auto dir = sz::string{fpath.parent_path().c_str()};
+               auto ext = sz::string{fpath.extension().c_str()};
 
                if (std::find_if(cache_dirs.begin(), cache_dirs.end(),
                                                 [&](const auto &item) { return item == dir; }) == std::end(cache_dirs)) {
-                       cache_dirs.emplace_back(dir.string());
+                       cache_dirs.emplace_back(dir);
                }
                if (std::find_if(cache_extensions.begin(), cache_extensions.end(),
                                                 [&](const auto &item) { return item == ext; }) == std::end(cache_extensions)) {
-                       cache_extensions.emplace_back(ext.string());
+                       cache_extensions.emplace_back(ext);
                }
 
                auto is_known = known_cached_files.insert(fpath.string());
@@ -215,7 +215,7 @@ public:
                /* We clean dir merely if we are running from the main process */
                if (rspamd_current_worker == nullptr && env_cleanup_disable == nullptr && loaded) {
                        const auto *log_func = RSPAMD_LOG_FUNC;
-                       auto cleanup_dir = [&](std::string_view dir) -> void {
+                       auto cleanup_dir = [&](sz::string_view dir) -> void {
                                for (const auto &ext: cache_extensions) {
                                        glob_t globbuf;
 
@@ -227,7 +227,7 @@ public:
 
                                        if (glob(glob_pattern.c_str(), 0, nullptr, &globbuf) == 0) {
                                                for (auto i = 0; i < globbuf.gl_pathc; i++) {
-                                                       auto path = std::string{globbuf.gl_pathv[i]};
+                                                       auto path = sz::string{globbuf.gl_pathv[i]};
                                                        std::size_t nsz;
                                                        struct stat st;
 
@@ -287,7 +287,7 @@ public:
 struct hs_shared_database {
        hs_database_t *db = nullptr; /**< internal database (might be in a shared memory) */
        std::optional<raii_mmaped_file> maybe_map;
-       std::string cached_path;
+       sz::string cached_path;
 
        ~hs_shared_database()
        {
@@ -335,7 +335,7 @@ struct real_hs_db {
        std::uint32_t crc32;
 };
 static auto
-hs_is_valid_database(void *raw, std::size_t len, std::string_view fname) -> tl::expected<bool, std::string>
+hs_is_valid_database(void *raw, std::size_t len, sz::string_view fname) -> tl::expected<bool, sz::string>
 {
        if (len < sizeof(real_hs_db)) {
                return tl::make_unexpected(fmt::format("cannot load hyperscan database from {}: too short", fname));
@@ -416,7 +416,7 @@ auto load_cached_hs_file(const char *fname, std::int64_t offset = 0) -> tl::expe
                                                                                         else {
                                                                                                 auto &tmpfile_checked = tmpfile.value();
                                                                                                 // Store owned string
-                                                                                                auto tmpfile_name = std::string{tmpfile_checked.get_name()};
+                                                                                                auto tmpfile_name = sz::string{tmpfile_checked.get_name()};
                                                                                                 std::size_t unserialized_size;
 
                                                                                                 if (auto ret = hs_serialized_database_size(((const char *) cached_serialized.get_map()) + offset,
index 4689d4276a3ef4753066996d3240ae54a1ff3637..642d9680c3e924896d1bcb1c8cf032eab2cdde74 100644 (file)
@@ -19,8 +19,7 @@
 #pragma once
 
 #include "config.h"
-#include <string>
-#include <string_view>
+#include "libutil/cxx/string.hxx"
 #include <cstdint>
 #include <optional>
 
@@ -54,7 +53,7 @@ public:
         * @param code
         * @param category
         */
-       error(std::string &&msg, int code, error_category category = error_category::INFORMAL)
+       error(sz::string &&msg, int code, error_category category = error_category::INFORMAL)
                : error_code(code), category(category)
        {
                static_storage = std::move(msg);
@@ -66,7 +65,7 @@ public:
         * @param code
         * @param category
         */
-       error(const std::string &msg, int code, error_category category = error_category::INFORMAL)
+       error(const sz::string &msg, int code, error_category category = error_category::INFORMAL)
                : error_code(code), category(category)
        {
                static_storage = msg;
@@ -148,12 +147,12 @@ public:
        }
 
 public:
-       std::string_view error_message;
+       sz::string_view error_message;
        int error_code;
        error_category category;
 
 private:
-       std::optional<std::string> static_storage;
+       std::optional<sz::string> static_storage;
 };
 
 }// namespace rspamd::util
index 45289053e82656447a3959986ea2f0d4b576fb3d..8c2b45ff47094f28fee463dc65f17e4c67f75d2f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright 2023 Vsevolod Stakhov
+ * Copyright 2024 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@
 #include "config.h"
 #include "contrib/expected/expected.hpp"
 #include "libutil/cxx/error.hxx"
-#include <string>
+#include "libutil/cxx/string.hxx"
 #include <sys/stat.h>
 
 namespace rspamd::util {
@@ -33,12 +33,12 @@ public:
        virtual ~raii_file() noexcept;
 
        static auto open(const char *fname, int flags) -> tl::expected<raii_file, error>;
-       static auto open(const std::string &fname, int flags) -> tl::expected<raii_file, error>
+       static auto open(const sz::string &fname, int flags) -> tl::expected<raii_file, error>
        {
                return open(fname.c_str(), flags);
        };
        static auto create(const char *fname, int flags, int perms) -> tl::expected<raii_file, error>;
-       static auto create(const std::string &fname, int flags, int perms) -> tl::expected<raii_file, error>
+       static auto create(const sz::string &fname, int flags, int perms) -> tl::expected<raii_file, error>
        {
                return create(fname.c_str(), flags, perms);
        };