aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-05-20 22:37:33 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-05-20 22:37:33 +0100
commit8b724ff651736b296f96e35730989ac2710c66e1 (patch)
tree3da3d44f05386cbff1aee8f6a0c4957ee4ae9a94
parent255340bb9fbc5e13d3eae070762a5af5f8275fe3 (diff)
downloadrspamd-8b724ff651736b296f96e35730989ac2710c66e1.tar.gz
rspamd-8b724ff651736b296f96e35730989ac2710c66e1.zip
Add a simple unit test for tokenizer.
-rw-r--r--test/lua/unit/tokenizer.lua23
1 files changed, 23 insertions, 0 deletions
diff --git a/test/lua/unit/tokenizer.lua b/test/lua/unit/tokenizer.lua
new file mode 100644
index 000000000..de15a6bd7
--- /dev/null
+++ b/test/lua/unit/tokenizer.lua
@@ -0,0 +1,23 @@
+context("Text tokenization test", function()
+ local util = require "rspamd_util"
+ local logger = require "rspamd_logger"
+ test("Tokenize simple text", function()
+ local cases = {
+ {"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer mattis, nibh",
+ {"Lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit",
+ "Integer", "mattis", "nibh"
+ }
+ },
+ }
+
+ for _,c in ipairs(cases) do
+ local w = util.tokenize_text(c[1])
+ assert_not_nil(w, "cannot tokenize " .. c[1])
+
+ for i,wrd in ipairs(w) do
+ logger.infox('%1:%2', i, wrd)
+ assert_equal(wrd, c[2][i])
+ end
+ end
+ end)
+end) \ No newline at end of file