You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rspamd.classifiers.lua 3.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. --[[
  2. Copyright (c) 2011-2015, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. -- Detect language of message and selects appropriate statfiles for it
  14. -- Common labels for specific statfiles
  15. local many_recipients_label = 'many recipients'
  16. local undisclosed_recipients_label = 'undisclosed recipients'
  17. local list_label = 'maillist'
  18. local long_subject_label = 'long subject'
  19. -- Get specific statfiles set based on message rules
  20. local function get_specific_statfiles(classifier, task)
  21. if not table.foreach then
  22. table.foreach = function(t, f)
  23. for k, v in pairs(t) do f(k, v) end
  24. end
  25. end
  26. local spec_st = {}
  27. -- More 5 recipients
  28. local st_many = classifier:get_statfile_by_label(many_recipients_label)
  29. if st_many then
  30. rcpt = task:get_recipients(2)
  31. if rcpt and table.maxn(rcpt) > 5 then
  32. print(table.maxn(rcpt))
  33. table.foreach(st_many, function(i,v) table.insert(spec_st,v) end)
  34. end
  35. end
  36. -- Undisclosed
  37. local st_undisc = classifier:get_statfile_by_label(undisclosed_recipients_label)
  38. if st_undisc then
  39. rcpt = task:get_recipients(2)
  40. if rcpt and table.maxn(rcpt) == 0 then
  41. table.foreach(st_undisc, function(i,v) table.insert(spec_st,v) end)
  42. end
  43. end
  44. -- Maillist
  45. local st_maillist = classifier:get_statfile_by_label(list_label)
  46. if st_maillist then
  47. local unsub_header = task:get_header_raw('List-Unsubscribe')
  48. if unsub_header then
  49. table.foreach(st_maillist, function(i,v) table.insert(spec_st,v) end)
  50. end
  51. end
  52. -- Long subject
  53. local st_longsubj = classifier:get_statfile_by_label(long_subject_label)
  54. if st_longsubj then
  55. local subj = task:get_header_raw('Subject')
  56. if subj and string.len(subj) > 150 then
  57. table.foreach(st_longsubj, function(i,v) table.insert(spec_st,v) end)
  58. end
  59. end
  60. if table.maxn(spec_st) > 1 then
  61. return spec_st
  62. else
  63. return nil
  64. end
  65. end
  66. classifiers['bayes'] = function(classifier, task, is_learn, is_spam)
  67. -- Subfunction for detection of message's language
  68. local detect_language = function(task)
  69. local parts = task:get_text_parts()
  70. for _,p in ipairs(parts) do
  71. local l = p:get_language()
  72. if l then
  73. return l
  74. end
  75. end
  76. return nil
  77. end
  78. -- Main procedure
  79. local selected = {}
  80. local spec_st = get_specific_statfiles(classifier, task)
  81. if spec_st then
  82. if is_learn then
  83. return spec_st
  84. else
  85. -- Merge tables
  86. table.foreach(spec_st, function(i,v) table.insert(selected,v) end)
  87. end
  88. end
  89. -- Detect statfile by language
  90. language = detect_language(task)
  91. if language then
  92. -- Find statfiles with specified language
  93. for _,st in ipairs(classifier:get_statfiles()) do
  94. -- Skip labeled statfiles
  95. if not st:get_label() then
  96. local st_l = st:get_param('language')
  97. if st_l and st_l == language then
  98. -- Insert statfile with specified language
  99. table.insert(selected, st)
  100. end
  101. end
  102. end
  103. if table.maxn(selected) > 1 then
  104. return selected
  105. end
  106. end
  107. -- Language not detected or specific language statfiles have not been found
  108. for _,st in ipairs(classifier:get_statfiles()) do
  109. -- Skip labeled statfiles
  110. if not st:get_label() then
  111. local st_l = st:get_param('language')
  112. -- Insert only statfiles without language
  113. if not st_l then
  114. table.insert(selected, st)
  115. end
  116. end
  117. end
  118. if table.maxn(selected) > 1 then
  119. return selected
  120. end
  121. return nil
  122. end