You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. -- Lua script to perform bayes learning
  2. -- This script accepts the following parameters:
  3. -- key1 - prefix for bayes tokens (e.g. for per-user classification)
  4. -- key2 - boolean is_spam
  5. -- key3 - string symbol
  6. -- key4 - boolean is_unlearn
  7. -- key5 - set of tokens encoded in messagepack array of strings
  8. -- key6 - set of text tokens (if any) encoded in messagepack array of strings (size must be twice of `KEYS[5]`)
  9. local prefix = KEYS[1]
  10. local is_spam = KEYS[2] == 'true' and true or false
  11. local symbol = KEYS[3]
  12. local is_unlearn = KEYS[4] == 'true' and true or false
  13. local input_tokens = cmsgpack.unpack(KEYS[5])
  14. local text_tokens
  15. if KEYS[6] then
  16. text_tokens = cmsgpack.unpack(KEYS[6])
  17. end
  18. local hash_key = is_spam and 'S' or 'H'
  19. local learned_key = is_spam and 'learns_spam' or 'learns_ham'
  20. redis.call('SADD', symbol .. '_keys', prefix)
  21. redis.call('HSET', prefix, 'version', '2') -- new schema
  22. redis.call('HINCRBY', prefix, learned_key, is_unlearn and -1 or 1) -- increase or decrease learned count
  23. for i, token in ipairs(input_tokens) do
  24. redis.call('HINCRBY', token, hash_key, is_unlearn and -1 or 1)
  25. if text_tokens then
  26. local tok1 = text_tokens[i * 2 - 1]
  27. local tok2 = text_tokens[i * 2]
  28. if tok1 then
  29. if tok2 then
  30. redis.call('HSET', token, 'tokens', string.format('%s:%s', tok1, tok2))
  31. else
  32. redis.call('HSET', token, 'tokens', tok1)
  33. end
  34. redis.call('ZINCRBY', prefix .. '_z', is_unlearn and -1 or 1, token)
  35. end
  36. end
  37. end