You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rspamd.classifiers.lua 3.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. --[[
  2. Copyright (c) 2011-2015, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. -- Detect language of message and selects appropriate statfiles for it
  14. -- Common labels for specific statfiles
  15. local many_recipients_label = 'many recipients'
  16. local undisclosed_recipients_label = 'undisclosed recipients'
  17. local list_label = 'maillist'
  18. local long_subject_label = 'long subject'
  19. -- Get specific statfiles set based on message rules
  20. local function get_specific_statfiles(classifier, task)
  21. if not table.foreach then
  22. table.foreach = function(t, f)
  23. for k, v in pairs(t) do f(k, v) end
  24. end
  25. end
  26. local spec_st = {}
  27. -- More 5 recipients
  28. local st_many = classifier:get_statfile_by_label(many_recipients_label)
  29. if st_many then
  30. rcpt = task:get_recipients(2)
  31. if rcpt and #rcpt > 5 then
  32. table.foreach(st_many, function(i,v) table.insert(spec_st,v) end)
  33. end
  34. end
  35. -- Undisclosed
  36. local st_undisc = classifier:get_statfile_by_label(undisclosed_recipients_label)
  37. if st_undisc then
  38. rcpt = task:get_recipients(2)
  39. if rcpt and #rcpt == 0 then
  40. table.foreach(st_undisc, function(i,v) table.insert(spec_st,v) end)
  41. end
  42. end
  43. -- Maillist
  44. local st_maillist = classifier:get_statfile_by_label(list_label)
  45. if st_maillist then
  46. local unsub_header = task:get_header_raw('List-Unsubscribe')
  47. if unsub_header then
  48. table.foreach(st_maillist, function(i,v) table.insert(spec_st,v) end)
  49. end
  50. end
  51. -- Long subject
  52. local st_longsubj = classifier:get_statfile_by_label(long_subject_label)
  53. if st_longsubj then
  54. local subj = task:get_header_raw('Subject')
  55. if subj and string.len(subj) > 150 then
  56. table.foreach(st_longsubj, function(i,v) table.insert(spec_st,v) end)
  57. end
  58. end
  59. if #spec_st > 1 then
  60. return spec_st
  61. else
  62. return nil
  63. end
  64. end
  65. classifiers['bayes'] = function(classifier, task, is_learn, is_spam)
  66. -- Subfunction for detection of message's language
  67. local detect_language = function(task)
  68. local parts = task:get_text_parts()
  69. for _,p in ipairs(parts) do
  70. local l = p:get_language()
  71. if l then
  72. return l
  73. end
  74. end
  75. return nil
  76. end
  77. -- Main procedure
  78. local selected = {}
  79. local spec_st = get_specific_statfiles(classifier, task)
  80. if spec_st then
  81. if is_learn then
  82. return spec_st
  83. else
  84. -- Merge tables
  85. table.foreach(spec_st, function(i,v) table.insert(selected,v) end)
  86. end
  87. end
  88. -- Detect statfile by language
  89. language = detect_language(task)
  90. if language then
  91. -- Find statfiles with specified language
  92. for _,st in ipairs(classifier:get_statfiles()) do
  93. -- Skip labeled statfiles
  94. if not st:get_label() then
  95. local st_l = st:get_param('language')
  96. if st_l and st_l == language then
  97. -- Insert statfile with specified language
  98. table.insert(selected, st)
  99. end
  100. end
  101. end
  102. if #selected > 1 then
  103. return selected
  104. end
  105. end
  106. -- Language not detected or specific language statfiles have not been found
  107. for _,st in ipairs(classifier:get_statfiles()) do
  108. -- Skip labeled statfiles
  109. if not st:get_label() then
  110. local st_l = st:get_param('language')
  111. -- Insert only statfiles without language
  112. if not st_l then
  113. table.insert(selected, st)
  114. end
  115. end
  116. end
  117. if #selected > 1 then
  118. return selected
  119. end
  120. return nil
  121. end