From c5e769ec0ce3d7347276809d79d7dcb81aae8324 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 23 Oct 2015 11:51:53 +0100 Subject: [PATCH] Allow conditional build of snowball. --- CMakeLists.txt | 6 +++++- config.h.in | 1 + src/CMakeLists.txt | 4 +++- src/libmime/message.c | 16 ++++++++++++++-- src/libstat/tokenizers/osb.c | 1 - src/rspamadm/CMakeLists.txt | 5 ++++- test/CMakeLists.txt | 4 +++- 7 files changed, 30 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aa66b075a..6c4830408 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,7 @@ OPTION(FORCE_GMIME24 "Link with gmime2.4 [default: OFF]" OPTION(INSTALL_EXAMPLES "Install examples [default: OFF]" OFF) OPTION(INSTALL_WEBUI "Install web interface [default: ON]" ON) OPTION(WANT_SYSTEMD_UNITS "Install systemd unit files on Linux [default: OFF]" OFF) +OPTION(ENABLE_SNOWBALL "Enable snowball stemmer [default: ON]" ON) # Build optimized code for following CPU (default i386) #SET(CPU_TUNE "i686") @@ -1014,7 +1015,10 @@ ADD_SUBDIRECTORY(contrib/xxhash) ADD_SUBDIRECTORY(contrib/cdb) ADD_SUBDIRECTORY(contrib/http-parser) ADD_SUBDIRECTORY(contrib/libottery) -ADD_SUBDIRECTORY(contrib/snowball) +IF(ENABLE_SNOWBALL MATCHES "ON") + ADD_SUBDIRECTORY(contrib/snowball) + SET(WITH_SNOWBALL 1) +ENDIF() ADD_SUBDIRECTORY(contrib/blake2) ADD_SUBDIRECTORY(contrib/libucl) ADD_SUBDIRECTORY(contrib/librdns) diff --git a/config.h.in b/config.h.in index f53f51978..3ecb964be 100644 --- a/config.h.in +++ b/config.h.in @@ -225,6 +225,7 @@ #cmakedefine HAVE_STROPS_H 1 #cmakedefine HAVE_SETSIG 1 #cmakedefine HAVE_OASYNC 1 +#cmakedefine WITH_SNOWBALL 1 /* Configure allocator */ #define uthash_malloc(sz) g_slice_alloc(sz) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 17ff6ebe5..b58067210 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -108,7 +108,9 @@ IF(NOT DEBIAN_BUILD) ENDIF(NOT DEBIAN_BUILD) TARGET_LINK_LIBRARIES(rspamd rspamd-server) -TARGET_LINK_LIBRARIES(rspamd stemmer) +IF (ENABLE_SNOWBALL MATCHES "ON") + TARGET_LINK_LIBRARIES(rspamd stemmer) +ENDIF() TARGET_LINK_LIBRARIES(rspamd rspamd-actrie) TARGET_LINK_LIBRARIES(rspamd ${RSPAMD_REQUIRED_LIBRARIES}) diff --git a/src/libmime/message.c b/src/libmime/message.c index 591107ba4..845f2f804 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -31,7 +31,11 @@ #include "images.h" #include "utlist.h" #include "tokenizers/tokenizers.h" + +#ifdef WITH_SNOWBALL #include "libstemmer.h" +#endif + #include "acism.h" #include @@ -947,13 +951,16 @@ static void rspamd_normalize_text_part (struct rspamd_task *task, struct mime_text_part *part) { +#ifdef WITH_SNOWBALL struct sb_stemmer *stem = NULL; +#endif rspamd_ftok_t *w; const guchar *r; gchar *temp_word; guint i, nlen; GArray *tmp; +#ifdef WITH_SNOWBALL if (part->language && part->language[0] != '\0' && IS_PART_UTF (part)) { stem = sb_stemmer_new (part->language, "UTF_8"); if (stem == NULL) { @@ -961,6 +968,7 @@ rspamd_normalize_text_part (struct rspamd_task *task, task->message_id, part->language); } } +#endif /* Ugly workaround */ tmp = rspamd_tokenize_text (part->content->data, @@ -971,12 +979,15 @@ rspamd_normalize_text_part (struct rspamd_task *task, if (tmp) { for (i = 0; i < tmp->len; i ++) { w = &g_array_index (tmp, rspamd_ftok_t, i); + r = NULL; +#ifdef WITH_SNOWBALL if (stem) { r = sb_stemmer_stem (stem, w->begin, w->len); } +#endif if (w->len > 0 && !(w->len == 6 && memcmp (w->begin, "!!EX!!", 6) == 0)) { - if (stem != NULL && r != NULL) { + if (r != NULL) { nlen = strlen (r); nlen = MIN (nlen, w->len); temp_word = rspamd_mempool_alloc (task->task_pool, nlen); @@ -1001,10 +1012,11 @@ rspamd_normalize_text_part (struct rspamd_task *task, } part->normalized_words = tmp; } - +#ifdef WITH_SNOWBALL if (stem != NULL) { sb_stemmer_delete (stem); } +#endif } #define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c))) diff --git a/src/libstat/tokenizers/osb.c b/src/libstat/tokenizers/osb.c index 7744e2883..20fc6ece8 100644 --- a/src/libstat/tokenizers/osb.c +++ b/src/libstat/tokenizers/osb.c @@ -28,7 +28,6 @@ #include "tokenizers.h" #include "stat_internal.h" -#include "libstemmer.h" #include "xxhash.h" #include "cryptobox.h" diff --git a/src/rspamadm/CMakeLists.txt b/src/rspamadm/CMakeLists.txt index 20cf3b3a0..7d693c388 100644 --- a/src/rspamadm/CMakeLists.txt +++ b/src/rspamadm/CMakeLists.txt @@ -11,7 +11,10 @@ SET(RSPAMADMSRC rspamadm.c commands.c pw.c keypair.c configtest.c ADD_EXECUTABLE(rspamadm ${RSPAMADMSRC}) TARGET_LINK_LIBRARIES(rspamadm rspamd-server) TARGET_LINK_LIBRARIES(rspamadm ${RSPAMD_REQUIRED_LIBRARIES}) -TARGET_LINK_LIBRARIES(rspamadm stemmer) + +IF (ENABLE_SNOWBALL MATCHES "ON") + TARGET_LINK_LIBRARIES(rspamadm stemmer) +ENDIF() TARGET_LINK_LIBRARIES(rspamadm rspamd-actrie) IF (NOT DEBIAN_BUILD) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1e8fcb67f..241565b19 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -25,7 +25,9 @@ ENDIF(NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin") TARGET_LINK_LIBRARIES(rspamd-test rspamd-cdb) TARGET_LINK_LIBRARIES(rspamd-test rspamd-http-parser) TARGET_LINK_LIBRARIES(rspamd-test ${RSPAMD_REQUIRED_LIBRARIES}) -TARGET_LINK_LIBRARIES(rspamd-test stemmer) +IF (ENABLE_SNOWBALL MATCHES "ON") + TARGET_LINK_LIBRARIES(rspamd-test stemmer) +ENDIF() TARGET_LINK_LIBRARIES(rspamd-test rspamd-actrie) ADD_CUSTOM_TARGET(rspamd-func-test COMMAND -- 2.39.5