]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Fix format string and some length issues
authorVsevolod Stakhov <vsevolod@rspamd.com>
Tue, 26 Sep 2023 13:29:30 +0000 (14:29 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Tue, 26 Sep 2023 13:29:30 +0000 (14:29 +0100)
src/libstat/tokenizers/tokenizers.c
src/libstat/tokenizers/tokenizers.h

index 6e55a33a65406350d5f7578165eed1a3500b4642..ee7234df774591f8bb813d67ade939cab195d0ee 100644 (file)
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2016 Vsevolod Stakhov
+/*
+ * Copyright 2023 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -871,7 +871,7 @@ void rspamd_normalize_words(GArray *words, rspamd_mempool_t *pool)
 
 void rspamd_stem_words(GArray *words, rspamd_mempool_t *pool,
                                           const gchar *language,
-                                          struct rspamd_lang_detector *d)
+                                          struct rspamd_lang_detector *lang_detector)
 {
        static GHashTable *stemmers = NULL;
        struct sb_stemmer *stem = NULL;
@@ -894,7 +894,7 @@ void rspamd_stem_words(GArray *words, rspamd_mempool_t *pool,
 
                        if (stem == NULL) {
                                msg_debug_pool(
-                                       "<%s> cannot create lemmatizer for %s language",
+                                       "cannot create lemmatizer for %s language",
                                        language);
                                g_hash_table_insert(stemmers, g_strdup(language),
                                                                        GINT_TO_POINTER(-1));
@@ -919,12 +919,11 @@ void rspamd_stem_words(GArray *words, rspamd_mempool_t *pool,
                                stemmed = sb_stemmer_stem(stem,
                                                                                  tok->normalized.begin, tok->normalized.len);
 
-                               dlen = stemmed ? strlen(stemmed) : 0;
+                               dlen = sb_stemmer_length(stem);
 
-                               if (dlen > 0) {
-                                       dest = rspamd_mempool_alloc(pool, dlen + 1);
+                               if (stemmed != NULL && dlen > 0) {
+                                       dest = rspamd_mempool_alloc(pool, dlen);
                                        memcpy(dest, stemmed, dlen);
-                                       dest[dlen] = '\0';
                                        tok->stemmed.len = dlen;
                                        tok->stemmed.begin = dest;
                                        tok->flags |= RSPAMD_STAT_TOKEN_FLAG_STEMMED;
@@ -940,8 +939,8 @@ void rspamd_stem_words(GArray *words, rspamd_mempool_t *pool,
                                tok->stemmed.begin = tok->normalized.begin;
                        }
 
-                       if (tok->stemmed.len > 0 && d != NULL &&
-                               rspamd_language_detector_is_stop_word(d, tok->stemmed.begin, tok->stemmed.len)) {
+                       if (tok->stemmed.len > 0 && lang_detector != NULL &&
+                               rspamd_language_detector_is_stop_word(lang_detector, tok->stemmed.begin, tok->stemmed.len)) {
                                tok->flags |= RSPAMD_STAT_TOKEN_FLAG_STOP_WORD;
                        }
                }
index e908c359d247710eb03c717dd919d88a2e8f9132..d696364e2a834e780b27c602db7f2702417cbb3f 100644 (file)
@@ -1,3 +1,19 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #ifndef TOKENIZERS_H
 #define TOKENIZERS_H
 
@@ -73,7 +89,7 @@ void rspamd_normalize_words(GArray *words, rspamd_mempool_t *pool);
 
 void rspamd_stem_words(GArray *words, rspamd_mempool_t *pool,
                                           const gchar *language,
-                                          struct rspamd_lang_detector *d);
+                                          struct rspamd_lang_detector *lang_detector);
 
 void rspamd_tokenize_meta_words(struct rspamd_task *task);