diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-08-24 10:19:55 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-08-24 10:19:55 +0100 |
commit | 2c5620474d0d07fbe5b4b7418c59b6cd27611a0e (patch) | |
tree | 29b20a7d3aa44a5cb89e7c7dc21b8941fd14f007 /test | |
parent | 6667005f12ce6ebd50ee9ef64a4d7af3ccb19199 (diff) | |
download | rspamd-2c5620474d0d07fbe5b4b7418c59b6cd27611a0e.tar.gz rspamd-2c5620474d0d07fbe5b4b7418c59b6cd27611a0e.zip |
[Test] Remove legacy tokenizer tests
Diffstat (limited to 'test')
-rw-r--r-- | test/lua/unit/tokenizer.lua | 34 |
1 files changed, 0 insertions, 34 deletions
diff --git a/test/lua/unit/tokenizer.lua b/test/lua/unit/tokenizer.lua index 16f8f1846..fbf7ee3e7 100644 --- a/test/lua/unit/tokenizer.lua +++ b/test/lua/unit/tokenizer.lua @@ -33,40 +33,6 @@ context("Text tokenization test", function() end) end - - cases = { - -- First token is bad - {"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer mattis, nibh", - {"orem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit", - "Integer", "mattis", "nibh" - } - }, - -- Unicode is broken - --{"Հետաքրքրվողների համար ոտորև ներկայացված", - -- {"Հետաքրքրվողների", "համար", "ոտորև", "ներկայացված"} - --}, - {"", {}}, - {",,,,,", {}}, - {"word,,,,,word ", {"ord", "word"}}, - {"word", {"ord"}}, - {",,,,word,,,", {"word"}} - } - - for i,c in ipairs(cases) do - test("Tokenize simple text (legacy) " .. i, function() - local w = util.tokenize_text(c[1], {}, true) - if #c[2] == 0 then - assert_equal(#w, 0, "must not have tokens " .. c[1]) - else - assert_not_nil(w, "must tokenize " .. c[1]) - - for i,wrd in ipairs(w) do - assert_equal(wrd, c[2][i]) - end - end - end) - end - cases = { {"word https://example.com/path word", {{5, 24}}, |