aboutsummaryrefslogtreecommitdiffstats
path: root/src/libutil
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rspamd.com>2024-11-29 11:31:35 +0000
committerVsevolod Stakhov <vsevolod@rspamd.com>2024-11-29 11:31:35 +0000
commitb39a9f52ed3f33082f13f51678d053ee80a2e1f4 (patch)
tree2144a18d85681df09f83e255f2e5c6d04e61e878 /src/libutil
parent6c0223b32b8fcb6621fa64197214abb400a09f52 (diff)
downloadrspamd-b39a9f52ed3f33082f13f51678d053ee80a2e1f4.tar.gz
rspamd-b39a9f52ed3f33082f13f51678d053ee80a2e1f4.zip
[Rework] Replace fastutf with simdutf
Simdutf is faster and has way better support of the architectures (especially when it comes to non-x86 stuff). Hence, it is a good idea to use it instead of the non-supported fastutf8 stuff.
Diffstat (limited to 'src/libutil')
-rw-r--r--src/libutil/CMakeLists.txt1
-rw-r--r--src/libutil/cxx/rspamd-simdutf.cxx41
-rw-r--r--src/libutil/fstring.c2
-rw-r--r--src/libutil/regexp.c2
-rw-r--r--src/libutil/rspamd_simdutf.h34
-rw-r--r--src/libutil/str_util.c2
6 files changed, 79 insertions, 3 deletions
diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt
index 67b7e948f..acf082708 100644
--- a/src/libutil/CMakeLists.txt
+++ b/src/libutil/CMakeLists.txt
@@ -18,6 +18,7 @@ SET(LIBRSPAMDUTILSRC
${CMAKE_CURRENT_SOURCE_DIR}/heap.c
${CMAKE_CURRENT_SOURCE_DIR}/multipattern.c
${CMAKE_CURRENT_SOURCE_DIR}/cxx/utf8_util.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/cxx/rspamd-simdutf.cxx
${CMAKE_CURRENT_SOURCE_DIR}/cxx/util_tests.cxx
${CMAKE_CURRENT_SOURCE_DIR}/cxx/file_util.cxx)
# Rspamdutil
diff --git a/src/libutil/cxx/rspamd-simdutf.cxx b/src/libutil/cxx/rspamd-simdutf.cxx
new file mode 100644
index 000000000..67b585812
--- /dev/null
+++ b/src/libutil/cxx/rspamd-simdutf.cxx
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2024 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * A simple interface for simdutf library to allow old functions to work properly
+ */
+
+#include "config.h"
+#include "simdutf.h"
+
+extern "C" {
+
+void rspamd_fast_utf8_library_init(unsigned flags)
+{
+ // This library requires no initialisation
+}
+
+off_t rspamd_fast_utf8_validate(const unsigned char *data, size_t len)
+{
+ auto res = simdutf::validate_utf8_with_errors((const char *) data, len);
+
+ if (res.error == simdutf::error_code::SUCCESS) {
+ return 0;
+ }
+
+ return res.count + 1;// We need to return offset for the first invalid character
+}
+} \ No newline at end of file
diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c
index 082620c27..8da6b0068 100644
--- a/src/libutil/fstring.c
+++ b/src/libutil/fstring.c
@@ -15,7 +15,7 @@
*/
#include "fstring.h"
#include "str_util.h"
-#include "contrib/fastutf8/fastutf8.h"
+#include "rspamd_simdutf.h"
#include "contrib/mumhash/mum.h"
diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c
index 9e98699fe..0646285ae 100644
--- a/src/libutil/regexp.c
+++ b/src/libutil/regexp.c
@@ -19,7 +19,7 @@
#include "ref.h"
#include "util.h"
#include "rspamd.h"
-#include "contrib/fastutf8/fastutf8.h"
+#include "rspamd_simdutf.h"
#ifndef WITH_PCRE2
/* Normal pcre path */
diff --git a/src/libutil/rspamd_simdutf.h b/src/libutil/rspamd_simdutf.h
new file mode 100644
index 000000000..c1fa07892
--- /dev/null
+++ b/src/libutil/rspamd_simdutf.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2024 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_RSPAMD_SIMDUTF_H
+#define RSPAMD_RSPAMD_SIMDUTF_H
+
+#pragma once
+#include "config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void rspamd_fast_utf8_library_init(unsigned flags);
+off_t rspamd_fast_utf8_validate(const unsigned char *data, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif//RSPAMD_RSPAMD_SIMDUTF_H
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index f8fff0dca..b3e47b7d4 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -31,7 +31,7 @@
#include <immintrin.h>
#endif
-#include "contrib/fastutf8/fastutf8.h"
+#include "rspamd_simdutf.h"
const unsigned char lc_map[256] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,