summaryrefslogtreecommitdiffstats
path: root/test
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-08-24 10:19:55 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-08-24 10:19:55 +0100
commit2c5620474d0d07fbe5b4b7418c59b6cd27611a0e (patch)
tree29b20a7d3aa44a5cb89e7c7dc21b8941fd14f007 /test
parent6667005f12ce6ebd50ee9ef64a4d7af3ccb19199 (diff)
downloadrspamd-2c5620474d0d07fbe5b4b7418c59b6cd27611a0e.tar.gz
rspamd-2c5620474d0d07fbe5b4b7418c59b6cd27611a0e.zip
[Test] Remove legacy tokenizer tests
Diffstat (limited to 'test')
-rw-r--r--test/lua/unit/tokenizer.lua34
1 files changed, 0 insertions, 34 deletions
diff --git a/test/lua/unit/tokenizer.lua b/test/lua/unit/tokenizer.lua
index 16f8f1846..fbf7ee3e7 100644
--- a/test/lua/unit/tokenizer.lua
+++ b/test/lua/unit/tokenizer.lua
@@ -33,40 +33,6 @@ context("Text tokenization test", function()
end)
end
-
- cases = {
- -- First token is bad
- {"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer mattis, nibh",
- {"orem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit",
- "Integer", "mattis", "nibh"
- }
- },
- -- Unicode is broken
- --{"Հետաքրքրվողների համար ոտորև ներկայացված",
- -- {"Հետաքրքրվողների", "համար", "ոտորև", "ներկայացված"}
- --},
- {"", {}},
- {",,,,,", {}},
- {"word,,,,,word ", {"ord", "word"}},
- {"word", {"ord"}},
- {",,,,word,,,", {"word"}}
- }
-
- for i,c in ipairs(cases) do
- test("Tokenize simple text (legacy) " .. i, function()
- local w = util.tokenize_text(c[1], {}, true)
- if #c[2] == 0 then
- assert_equal(#w, 0, "must not have tokens " .. c[1])
- else
- assert_not_nil(w, "must tokenize " .. c[1])
-
- for i,wrd in ipairs(w) do
- assert_equal(wrd, c[2][i])
- end
- end
- end)
- end
-
cases = {
{"word https://example.com/path word",
{{5, 24}},