summaryrefslogtreecommitdiffstats
path: root/test/lua/unit/tokenizer.lua
blob: de15a6bd7d6e565ea6ca02cce38e2cce356ab879 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
context("Text tokenization test", function()
  local util = require "rspamd_util"
  local logger = require "rspamd_logger"
  test("Tokenize simple text", function()
    local cases = {
      {"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer mattis, nibh",
        {"Lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit",
        "Integer", "mattis", "nibh"
        }
      },
    }
    
    for _,c in ipairs(cases) do
      local w = util.tokenize_text(c[1])
      assert_not_nil(w, "cannot tokenize " .. c[1])
      
      for i,wrd in ipairs(w) do
        logger.infox('%1:%2', i, wrd)
        assert_equal(wrd, c[2][i])
      end
    end
  end)
end)