aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/snowball
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-12-31 17:43:49 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-12-31 17:43:49 +0000
commit9f23c1e7a30892d1ba670b512112a218ee50b806 (patch)
treefc6357ca9bbba046fd105aaba82f223a1dc26c4e /contrib/snowball
parent2375dba898b481837879940dfdcf3ea85248fe01 (diff)
downloadrspamd-9f23c1e7a30892d1ba670b512112a218ee50b806.tar.gz
rspamd-9f23c1e7a30892d1ba670b512112a218ee50b806.zip
Fix snowball distribution.
Diffstat (limited to 'contrib/snowball')
-rw-r--r--contrib/snowball/CMakeLists.txt7
-rw-r--r--contrib/snowball/libstemmer/libstemmer_c.in95
2 files changed, 95 insertions, 7 deletions
diff --git a/contrib/snowball/CMakeLists.txt b/contrib/snowball/CMakeLists.txt
index 1dee79490..8674e2d88 100644
--- a/contrib/snowball/CMakeLists.txt
+++ b/contrib/snowball/CMakeLists.txt
@@ -1,10 +1,3 @@
-PROJECT(snowball C)
-
-cmake_minimum_required(VERSION 2.8)
-
-INCLUDE(CheckCCompilerFlag)
-INCLUDE(FindPerl)
-
# End of configuration
SET(LIBSTEM_ALGORITHMS danish dutch english finnish french german hungarian
italian norwegian porter portuguese romanian
diff --git a/contrib/snowball/libstemmer/libstemmer_c.in b/contrib/snowball/libstemmer/libstemmer_c.in
new file mode 100644
index 000000000..4de579874
--- /dev/null
+++ b/contrib/snowball/libstemmer/libstemmer_c.in
@@ -0,0 +1,95 @@
+
+#include <stdlib.h>
+#include <string.h>
+#include "../include/libstemmer.h"
+#include "../runtime/api.h"
+#include "@MODULES_H@"
+
+struct sb_stemmer {
+ struct SN_env * (*create)(void);
+ void (*close)(struct SN_env *);
+ int (*stem)(struct SN_env *);
+
+ struct SN_env * env;
+};
+
+extern const char **
+sb_stemmer_list(void)
+{
+ return algorithm_names;
+}
+
+static stemmer_encoding_t
+sb_getenc(const char * charenc)
+{
+ struct stemmer_encoding * encoding;
+ if (charenc == NULL) return ENC_UTF_8;
+ for (encoding = encodings; encoding->name != 0; encoding++) {
+ if (strcmp(encoding->name, charenc) == 0) break;
+ }
+ if (encoding->name == NULL) return ENC_UNKNOWN;
+ return encoding->enc;
+}
+
+extern struct sb_stemmer *
+sb_stemmer_new(const char * algorithm, const char * charenc)
+{
+ stemmer_encoding_t enc;
+ struct stemmer_modules * module;
+ struct sb_stemmer * stemmer;
+
+ enc = sb_getenc(charenc);
+ if (enc == ENC_UNKNOWN) return NULL;
+
+ for (module = modules; module->name != 0; module++) {
+ if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break;
+ }
+ if (module->name == NULL) return NULL;
+
+ stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
+ if (stemmer == NULL) return NULL;
+
+ stemmer->create = module->create;
+ stemmer->close = module->close;
+ stemmer->stem = module->stem;
+
+ stemmer->env = stemmer->create();
+ if (stemmer->env == NULL)
+ {
+ sb_stemmer_delete(stemmer);
+ return NULL;
+ }
+
+ return stemmer;
+}
+
+void
+sb_stemmer_delete(struct sb_stemmer * stemmer)
+{
+ if (stemmer == 0) return;
+ if (stemmer->close == 0) return;
+ stemmer->close(stemmer->env);
+ stemmer->close = 0;
+ free(stemmer);
+}
+
+const sb_symbol *
+sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size)
+{
+ int ret;
+ if (SN_set_current(stemmer->env, size, (const symbol *)(word)))
+ {
+ stemmer->env->l = 0;
+ return NULL;
+ }
+ ret = stemmer->stem(stemmer->env);
+ if (ret < 0) return NULL;
+ stemmer->env->p[stemmer->env->l] = 0;
+ return (const sb_symbol *)(stemmer->env->p);
+}
+
+int
+sb_stemmer_length(struct sb_stemmer * stemmer)
+{
+ return stemmer->env->l;
+}