]> source.dussan.org Git - rspamd.git/commitdiff
* Rework structure of sample configs
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Wed, 24 Jun 2009 13:09:57 +0000 (17:09 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Wed, 24 Jun 2009 13:09:57 +0000 (17:09 +0400)
* Fix rspamc
* Add english readme

CMakeLists.txt
README.en.txt [new file with mode: 0644]
conf/2tld.inc [new file with mode: 0644]
conf/drugs.inc [new file with mode: 0644]
conf/fraud.inc [new file with mode: 0644]
conf/headers.inc [new file with mode: 0644]
conf/html.inc [new file with mode: 0644]
conf/lotto.inc [new file with mode: 0644]
conf/surbl-whitelist.inc [new file with mode: 0644]
rspamc.pl.in
rspamd.conf.sample

index 67611982796b7f3603d0c3f5fb9bbba883bb6f2f..c13721a4a84f3901a4b67d9abac0571c36246b33 100644 (file)
@@ -393,6 +393,7 @@ ENDIF(ENABLE_PERL MATCHES "ON")
 
 CONFIGURE_FILE(config.h.in src/config.h)
 CONFIGURE_FILE(rspamc.pl.in rspamc.pl @ONLY)
+CONFIGURE_FILE(rspamd.conf.sample conf/rspamd.conf.sample @ONLY)
 
 ADD_EXECUTABLE(rspamd ${RSPAMDSRC} ${CONTRIBSRC} ${TOKENIZERSSRC} 
                                          ${CLASSIFIERSSRC} ${PLUGINSSRC} ${YACC_OUTPUT} 
@@ -454,8 +455,12 @@ ENDIF(LIBUTIL_LIBRARY)
 TARGET_LINK_LIBRARIES(utils/expression-parser ${GLIB2_LIBRARIES})
 TARGET_LINK_LIBRARIES(utils/expression-parser ${GMIME2_LIBRARIES})
 
+
 INSTALL(PROGRAMS rspamd-${RSPAMD_VERSION} DESTINATION bin RENAME rspamd)
 INSTALL(PROGRAMS rspamc.pl DESTINATION bin RENAME rspamc)
+INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_INSTALL_PREFIX}/etc/rspamd)")
+INSTALL(DIRECTORY conf/ DESTINATION etc/rspamd)
+
 IF(ENABLE_PERL MATCHES "ON")
        INSTALL(CODE "EXECUTE_PROCESS(COMMAND make install WORKING_DIRECTORY perl)")
 ENDIF(ENABLE_PERL MATCHES "ON")
diff --git a/README.en.txt b/README.en.txt
new file mode 100644 (file)
index 0000000..2bfd33f
--- /dev/null
@@ -0,0 +1,170 @@
+API.
+===========
+
+API of rspamd is described in Doxygen documentation.
+
+Logic of operation of rspamd filters.
+==============================
+
+1) All filters are registered in a config a file in the description of chains of filters:
+header_filters = "regexp, my_func"
+Where the filter name is or the name c the unit, or the name of script (lua or perl) function 
+Types of filters:
+* header_filters - the filters of headers
+* mime_filters - the filters for every mime part
+* message_filters - the filters of message without mime parsing
+* url_filters - filters of URLs in messages
+
+Filter register their results in metrics.
+
+2) The Metric is a character value in which filters register their results.
+There is a metrics by default - "default".
+For each metrics there is a special function of consolidation which calculates coefficients
+of results according to the internal logic of correspondence of characters and coefficients. 
+By default the such function is the simple sum that can be configured in a configuration file:
+
+# the Block factors
+factors {
+       # For example, "SURBL_DNS" =5.0
+       "SYMBOL_NAME" = coefficient;
+};
+
+Also for the metrics it is possible to register special consolidation function:
+
+metric {
+       name = "test_metric";
+       function = "some_function";
+       required_score = 20.0;
+};
+
+
+The protocol.
+=========
+
+Answer format:
+SPAMD/1.1 0 EX_OK 
+      \/  \/   \/
+  Version Code Errors
+Spam: False; 2 / 5
+It is a format of compatibility with sa-spamd (without metrics)
+
+New format of the answer:
+RSPAMD/1.0 0 EX_OK
+Metric: Name; Spam_Result; Spam_Mark / Spam_Mark_Required
+Metric: Name2; Spam_Result2; Spam_Mark2 / Spam_Mark_Required2
+
+Type headers metric can be a little.
+Format of output of characters:
+SYMBOL1, SYMBOL2, SYMBOL3 - a format of compatibility with sa-spamd
+Symbol: Name; Param1, Param2, Param3 - a format rspamd
+
+The answer format:
+PROCESS SPAMC/1.2
+\/      \/
+Command Version
+
+SPAMC - the protocol of compatibility with sa-spamd
+RSPAMC - new rspamd protocol
+In any of operating modes following headers are supported:
+Content-Length - Length of the message
+Helo - HELO, received from the client
+From - MAIL FROM
+IP - IP of the client
+Recipient-Number - Number of recipients
+Rcpt - the recipient
+Queue-ID - The queue identifier
+
+These values can be used in filters rspamd.
+
+Regular expressions
+====================
+
+Regular expressions are described in regexp module
+.module ' regexp ' {
+       SYMBOL = "regexp_expression";
+};
+header_filters = "regexp";
+
+Format of regular expression:
+"/pattern/flags"
+Also for header lines there is special regexp line:
+headername =/pattern/flags
+
+Flags of regexp:
+i, m, s, x, u, o - same, as at perl/pcre
+r - raw not coded in utf8 regexp
+H - searches for a header
+M - searches in undecoded message
+P - searches in decoded mime parts
+U - searches in urls
+X - searches in undecoded headers
+
+Expression can contain regular expressions, functions, operators of logic and brackets:
+SOME_SYMBOL = "To =/blah@blah/H AND! (From =/blah@blah/H | Subject =/blah/H)"
+
+Also it is possible to use variables:
+$to_blah = "To =/blah@blah/H";
+$from_blah = "From =/blah@blah/H";
+$subject_blah = "Subject =/blah/H";
+
+Then the previous expression will be such:
+
+SOME_SYMBOL = "$ {to_blah} AND! ($ {from_blah} | $ {subject_blah})"
+
+Logic expressions rspamd
+===========================
+
+Expressions containing regular expressions, functions, logic operations, brackets, can be used
+for the filtering. General rules:
+- Logic operations can be boolean "And": ' & ', boolean "OR": ' | ' and boolean negation: '! '.
+- A priority of logic operations: &| -> !, for priority change it is possible to use brackets:
+ (A AND! B) |! (C|D)
+- Space symbols in expressions are ignored
+- The operand containing/re/args or string =/re/args is considered regular expression, in regular
+expressions all symbols ' / ' and ' "' should be escaped by a symbol ' \', but symbol '\' is not need to be escaped.
+- The operand which accepts arguments, is considered function. Arguments of function can be expressions, regexps or other functions.
+Arguments in function are evaluated from left to right.
+- There is a number of built-in functions:
+  * header_exists - accepts header's name as argument, returns true if such heading exists
+  * compare_parts_distance - accepts as argument number from 0 to 100 which reflects a difference in percentage
+    between letter parts. Function works with the messages containing 2 text parts (text/plain and text/html) and
+       returns true when these parts differ more than on N percent. If the argument is not specified,
+       function searches for completely different parts.
+  * compare_transfer_encoding - compares Content-Transfer-Encoding with the argument
+  * content_type_compare_param - compares Content-Type param with regular expression or line:
+     content_type_compare_param (Charset,/windows-\d +/)
+        content_type_compare_param (Charset, ascii)
+  * content_type_has_param - checks for specified Content-Type parameter
+  * content_type_is_subtype - compares a subtype of content-type to regular expression or line
+  * content_type_is_type - compares type of content-type to regular expression or line
+     content_type_is_type (text)
+     content_type_is_subtype (/?.html/)
+  * regexp_match_number - accepts as the number of matched expressions as first parameter number and list of expressions. 
+    If the number of matched expressions is more than first argument function returns TRUE, for example:
+       regexp_match_number (2, $ {__ RE1}, $ {__ RE2}, header_exists (Subject))
+  * has_only_html_part - function returns TRUE if there is only HTML part in the message
+  * compare_recipients_distance - calculates percent of similar recipients of the message. Accepts argument - a threshold in 
+    percentage of similar recipients.
+  * is_recipients_sorted - returns TRUE if the list of addressees is sorted (works only if the number of addressees> = 5).
+  * is_html_balanced - returns TRUE if tags in all html parts are balanced
+  * has_html_tag - returns TRUE if specified html tag is found
+
+The module chartable.
+================
+
+The module is intended for search of words with the mixed symbols, for example:
+kашa - a part in a Latin, and a part in Cyrillics.
+Module parametres:
+
+.module ' chartable ' {
+       metric = "default";
+       symbold = "R_MIXED_CHARSET";
+       threshold = "0.1";
+};
+
+threshold is a relation of transitions between codings to total number of symbols in words, for example, we have a word
+"kаша" (the first letter Latin), then total number of transitions - 3, and number of transitions between codings - 1, then 
+The relation - 1/3.
+
+For inclusion of the module he is necessary for adding in the list mime_filters:
+mime_filters = "chartable";
diff --git a/conf/2tld.inc b/conf/2tld.inc
new file mode 100644 (file)
index 0000000..299f962
--- /dev/null
@@ -0,0 +1,954 @@
+#
+# $Id: rspamd-2tld.conf,v 1.3 2009/06/17 15:01:17 dmx Exp $
+#
+infobox.ru
+free.bg
+mirohost.net
+007sites.com
+55fast.com
+5nxs.com
+freehost10.com
+hostaim.com
+iwebsource.com
+yourfreehosting.net
+freehostingz.com
+io.ua
+3eu.ru
+
+intway.info
+intwayblog.net
+ruprom.net
+
+lpchat.com
+szm.com
+gmxhome.de
+freewaywebhost.com
+sdsmt.edu
+ifrance.com
+100webspace.net
+jimdo.com
+gn8.net
+siteedit.su
+se-ua.net
+
+googlegroups.com
+
+digitalzones.com
+arcadepages.com
+builtfree.org
+angelcities.com
+freehostyou.com
+freesite.org
+freecities.com
+freewebpages.org
+ibnsites.com
+
+samomu.ru
+blog-nn.ru
+
+byethost24.com
+hostia.ru
+times.lv
+z8.ru
+1gb.ru
+url-site.com
+911mb.com
+tushino.com
+
+sp.ru
+
+omp9.com
+
+1freewebspace.com
+freewhost.com
+na.by
+gq.nu
+5u.com
+hy.cz
+8k.com
+
+nichost.ru
+
+qwerty.su
+qwerty.in
+
+10fast.net
+0fees.net
+netsolhost.com
+webs.com
+we.bs
+wz.cz
+go9.ru
+com.ua
+
+gorodok.net
+republika.pl
+interia.pl
+nazwa.pl
+
+infostore.org
+sapo.pt
+
+promzone.ru
+
+# Services for make URL's
+notlong.com
+
+# internet shops
+wowex.ru
+webasyst.net
+
+# ayola.net free hosting
+md6.ru
+ex6.ru
+w6.ru
+sk6.ru 
+md8.ru
+z16.ru
+cwx.ru
+
+# hostland.ru free hosting
+tu2.ru
+
+freetzi.com
+coolpage.biz
+
+# free a1free.net hosting
+a1free.net
+
+# free blog-o-hosting
+byethost2.com
+byethost3.com
+byethost4.com
+byethost5.com
+byethost6.com
+byethost7.com
+byethost8.com
+byethost9.com
+byethost10.com
+byethost11.com
+byethost12.com
+byethost13.com
+byethost14.com
+byethost15.com
+byethost16.com
+byethost17.com
+byethost18.com
+
+# free lunatic hosting
+aecru.org
+
+tw1.ru
+jino.ru
+ru.gg
+
+# http://www.de.gd/millired/home.html
+eu.ki
+cool.lc
+m.vu
+de.pl
+at.st
+ch.st
+seite.com
+mobi.ps
+biz.ps
+1x.net
+seite.info
+deutschland.nu
+bilder.net
+flirten.info
+seite.name
+seite.in
+seite.cz
+seite.es
+seite.pl
+seite.ru
+seite.im
+seite.li
+seite.lt
+seite.lv
+seite.sc
+seite.st
+seite.vc
+infos.lc
+deutschland.lc
+homepage.lc
+auto.lc
+musik.lc
+urlaub.lc
+musik.cx
+germany.cx
+homepage.cx
+infos.cx
+cool.hn
+shopping.hn
+tipp.cz
+gmbh.tw
+de.im
+top8.com
+game.lc
+start.lc
+xx.lc
+hp.lc
+on.lc
+portal.lc
+faq.lc
+page.lc
+home.lc
+domain.lc
+spiele.lc
+free.lc
+kostenlos.lc
+test.lc
+privat.lc
+dvd.lc
+pc.lc
+fotos.lc
+top.lc
+hot.lc
+gratis.lc
+forum.lc
+bilder.lc
+reise.lc
+liebe.lc
+24.lc
+vz.lc
+links.lc
+service.lc
+club.lc
+shops.lc
+da.cx
+eur.lc
+euro.lc
+europa.lc
+fehler.in
+bund.in
+hase.in
+teufel.in
+hexe.in
+mitglieder.in
+voten.in
+inserat.in
+smilie.in
+papst.in
+foren.in
+politik.in
+vertrieb.in
+anschauen.in
+finanzen.in
+redner.in
+esel.in
+hund.in
+sport.dj
+clandomain.de
+clandomain.org
+cl4n.org
+l4n.org
+cs-clan.org
+kr3w.de
+te4m.de
+thelan.info
+download.ac
+fun.gg
+download.sh
+download.je
+downloads.lc
+clan.lc
+clan.mn
+uk.nf
+web.gg
+k1.cx
+gb.nf
+us.nf
+usa.gg
+ru.nf
+blog.lc
+spam.lc
+about.lc
+user.lc
+xl.lc
+xxl.lc
+all.lc
+2.je
+4.je
+6.je
+7.je
+8.je
+9.je
+eu.gp
+de.gp
+deutsch.lc
+english.lc
+francais.lc
+espanol.lc
+italiano.lc
+portugues.lc
+dansk.lc
+nederlands.lc
+polski.lc
+Norsk.lc
+svenska.lc
+turkish.lc
+chinese.lc
+suomi.lc
+japanese.lc
+russian.lc
+greek.lc
+oesterreich.lc
+schweiz.lc
+nederland.lc
+zx9.de
+aw3.de
+com.nu
+eu.nu
+firm.nu
+qu.am
+co.gp
+movie.lc
+filme.lc
+int.nf
+int.ps
+jixx.de
+jixx.net
+guest.de
+guests.de
+ciy.de
+tv.gg
+eu.gg
+us.gg
+x.gg
+npx.de
+yj.ae
+flf.li
+2.ag
+fk.gs
+se.nf
+ca.nf
+au.nf
+be.nf
+dk.nf
+fi.nf
+gr.nf
+no.nf
+pl.nf
+cz.nf
+jp.nf
+ar.nf
+kr.nf
+br.nf
+cn.nf
+nl.gp
+es.gp
+asia.gp
+online.gp
+fr.gp
+it.gp
+search.gp
+mp3.gp
+b2b.lc
+wiki.lc
+flirt.lc
+18.lc
+3d.lc
+party.lc
+photos.lc
+you.lc
+space.lc
+share.lc
+today.lc
+de.ki
+in.nf
+world.mu
+now.lc
+welt.tl
+team.tl
+planet.tl
+netz.tl
+center.tl
+server.tl
+design.tl
+city.tl
+berlin.tl
+team.cx
+server.gg
+community.lc
+gaming.lc
+gilde.in
+chat.dj
+dr.ag
+name.vu
+media.je
+edu.ms
+name.vg
+pro.vg
+biz.uz
+pro.ac
+name.ac
+seite.asia
+eu.cr
+uk.cr
+ch.kg
+nl.kg
+gr.kg
+pt.kg
+lu.kg
+hu.kg
+online.tc
+info.nu
+de.cg
+de.gd
+9gb.de
+x9.eu
+uk.st
+us.st
+at.cr
+online.cr
+shopping.cr
+mp3.cr
+free.cr
+sms.cr
+1.vg
+top.tc
+mail.ht
+job.ec
+de.ht
+seite24.eu
+2.ly
+pro.ly
+internet.ly
+us.ly
+me.ly
+shop.fm
+
+#
+
+altervista.org
+iespana.es
+
+sk6.ru
+cantv.net
+lycos.es
+biz.ua
+wbs.cz
+ic.cz
+
+# 000webhost.com domains
+000webhost.com
+net23.net
+net84.net
+site88.net
+web44.net
+
+ya.com
+hopto.org
+free-webhosts.com
+byteact.com
+icr38.net
+angelfire.com
+mylivepage.ru
+valuehost.ru
+netfirms.com
+siteedit.ru
+
+# Other Zenon domains
+
+zmail.ru
+id.ru
+go.ru
+
+# Free wap hosting
+wen.ru
+
+# Relcom
+msk.su
+spb.su
+abkhazia.su
+adygeya.su
+arkhangelsk.su
+armenia.su
+ashgabad.su
+azerbaijan.su
+balashov.su
+bashkiria.su
+belgorod.su
+bryansk.su
+bukhara.su
+chimkent.su
+dagestan.su
+east-kazakhstan.su
+exnet.su
+georgia.su
+grozny.su
+ivanovo.su
+jambyl.su
+kalmykia.su
+kaluga.su
+karacol.su
+karaganda.su
+karelia.su
+khakassia.su
+komi.su
+krasnodar.su
+kurgan.su
+kustanai.su
+lenug.su
+mangyshlak.su
+mordovia.su
+murmansk.su
+nalchik.su
+navoi.su
+north-kazakhstan.su
+nov.su
+obninsk.su
+penza.su
+pokrovsk.su
+sochi.su
+tashkent.su
+termez.su
+togliatti.su
+troitsk.su
+tselinograd.su
+tula.su
+tuva.su
+vladikavkaz.su
+vladimir.su
+vologda.su
+yakutia.su
+
+europtrade.ru
+890m.com
+site40.net
+site50.net
+freezoka.com
+blogspot.com
+pvm62.com
+virtuale.org
+access.to
+topf.ru
+pisem.su
+ok.ru
+100mb.com
+co.cc
+
+# other pocha.ru domains
+krovatka.su
+
+# ucoz.ru
+3dn.ru
+at.ua
+clan.su
+do.am
+moy.su
+my1.ru
+p0.ru
+ucoz.com
+ucoz.de
+ucoz.es
+ucoz.hu
+ucoz.kz
+ucoz.lv
+ucoz.net
+ucoz.org
+ucoz.ua
+vo.uz
+
+# Create a ShortURL
+1sta.com
+vze.com
+
+# free hosters from:
+# http://svn.park.rambler.ru/svn/trunk/Spider/conf/freehosters.exclude
+
+spb.ru
+rbcmail.ru
+hut.ru
+narod.ru
+narod2.ru
+chat.ru
+by.ru
+far.ru
+h1.ru
+boom.ru
+nm.ru
+fbi.ru
+4all.ru
+bizmail.ru
+dnevnik.ru
+ecard.ru
+ganja.ru
+goa.ru
+hash.ru
+mpeg3.ru
+nrg.ru
+phreak.ru
+plazma.ru
+vi-rus.ru
+xak.ru
+newmail.ru
+z-photo.ru
+agava.ru
+dem.ru
+webservis.ru
+ur.ru
+bos.ru
+vov.ru
+r2.ru
+bip.ru 
+wallst.ru
+hotmail.ru
+hotbox.ru
+euro.ru
+dax.ru
+sitecity.ru
+al.ru
+dtn.ru
+hop.ru
+lgg.ru
+ru.ru
+pochtamt.ru
+stsland.ru
+promural.ru
+ufanet.ru
+kharkov.ua
+pisem.net
+fromru.com
+ukrbiz.net
+aiq.ru
+fatal.ru
+hoha.ru
+h11.ru
+h5.ru
+pochta.ru
+mail333.com
+krovatka.net
+orc.ru
+h12.ru
+front.ru
+land.ru
+com1.ru
+h14.ru
+8m.com
+sbn.bz
+xost.ru
+nxt.ru
+proxycheker.ru
+h15.ru
+hut1.ru
+webhost.ru
+polubomu.ru
+beplaced.ru
+h16.ru
+yard.ru
+smtp.ru
+pop3.ru
+hut2.ru
+subs.ru
+pud.ru
+hostonfly.ru
+jino-net.ru
+vio.ru
+vip.su
+i-nets.ru
+sitehome.ru
+freezona.ru
+pixi.ru
+ucoz.ru
+russian.ru
+mail2k.ru
+nextmail.ru
+programist.ru
+dezigner.ru
+email.su
+xaker.ru
+rubas.ru
+e2e.ru
+page.by
+x5x.ru
+h17.ru
+nightmail.ru
+h10.ru
+tu1.ru
+fxf.ru
+inlocal.ru
+hostq.ru
+greenline.ru
+21r.ru
+0805.ru
+aget.ru
+freepage.ru
+hoter.ru
+rx.ru
+wmsite.ru
+fxcity.ru
+ffx.ru
+clubfx.ru
+h18.ru
+vipshop.ru
+vipcentr.ru
+vdelo.ru
+viptop.ru
+tora.ru
+fromru.su
+iplot.ru
+pips.ru
+web-box.ru
+okis.ru
+hocomua.ru
+vdsite.ru
+enjjoy.ru
+metastock.ru
+onlymail.ru
+tut.su
+ifolder.ru
+wapn.ru
+hocom.by
+yadviga.ru
+x53.ru
+ilovethis.ru
+codingclub.ru
+onepage.ru
+onep.ru
+kzet.ru
+xam.su
+epage.ru
+student.su
+hu2.ru
+logmail.ru
+students.ru
+h2m.ru
+ho.ua
+mybyte.ru
+supercharts.ru
+forum24.ru
+quake.ru
+
+# free hosters from:
+# http://svn.park.rambler.ru/svn/trunk/Spider/conf/foreign_freehosters.exclude
+
+hotusa.org
+99inch.com
+yoll.net
+my-age.net
+enacre.net
+maclenet.com
+bebto.com
+undonet.com
+lydo.org
+imess.net
+pochta.org
+mail15.com
+fbhosting.com
+freeservers.com
+4t.com
+iwarp.com
+faithweb.com
+homestead.com
+itgo.com
+tvheaven.com
+netfreehost.com
+mindnmagick.com
+frageon.net
+dnip.net
+htmlplanet.com
+scriptmania.com
+100mbfreesite.com
+sfkteam.com
+glooby.net
+eclub.lv
+badland.com
+zomi.net
+power-emergency.com
+at.lv
+uadom.net
+ho11.com
+0golf.com
+netfast.org
+topcities.com
+100free.com
+1hwy.com
+lbgo.com
+izypage.net
+250free.com
+741.com
+noneto.com
+bestyour.org
+501megs.com
+atspace.com
+b4site.com
+gobot.com
+100freemb.com
+ueuo.com
+atspace.org
+8m.net
+freehosting.net
+greatnow.com
+1sweethost.com
+zzn.com
+i8.com
+servik.com
+s5.com
+freehostia.com
+sitesfree.com
+150m.com
+zoomshare.com
+freehostspace.com
+hostingisus.com
+3000mb.com
+0catch.com
+xphost.org
+onecoolhost.com
+php0h.net
+0pi.com
+cheeb.com
+20m.com
+00server.com
+sitesled.com
+00trek.com
+freewebspace.com
+4mg.com
+awardspace.com
+00author.com
+prohosting.com
+50webs.com
+275mb.com
+20to.com
+6te.net
+freewebpage.org
+indiegroup.com
+memebot.com
+5gbfree.com
+ourprofile.us
+110mb.com
+kwikphp.com
+bappy.com
+sphosting.com
+tekcities.com
+alfaspace.net
+vettepics.com
+atspace.us
+t35.com
+oceansfree.com
+30mb.com
+urllogs.com
+freehomepage.com
+ifastnet.com
+hostervice.info
+swiftphp.com
+atspace.name
+php0h.com
+freehostpro.com
+freehostonline.com
+awardspace.biz
+tecbox.com
+deep-ice.com
+myweb.io
+1111mb.com
+freerhost.com
+0moola.com
+50megs.com
+fws1.com
+4sql.net
+1majorhost.com
+gigcities.com
+0505mb.com
+freebitty.com
+prohosts.org
+gigsweb.com
+101freehost.com
+reghosting.com
+sprinterweb.net
+0000host.com
+2u-2.com
+tripod.com
+nofeehost.com
+web.com
+kogaryu.com
+itrello.com
+bravehost.com
+sinfree.net
+20ii.com
+on-4.com
+quotaless.com
+isgreat.org
+worldbreak.com
+20is.com
+00it.com
+fx-club.org
+9cy.com
+psend.com
+atspace.biz
+totalh.com
+orgfree.com
+php1h.com
+1asphost.com
+789mb.com
+yeahost.com
+3-hosting.net
+byethost.com
+wtcsites.com
+myfreewebhost.org
+20fr.com
+jvl.com
+brinkster.net
+freehosting300.com
+usafreespace.com
+freewebhostingpro.com
+justfree.com
+sppages.com
+70mb.ru
+joolo.com
+free-site-host.com
+eqo.de
+fora.pl
+envy.nu
+247ihost.com
+00bp.com
+iifree.net
+fr33webhost.com
+invbridge.com
+servetown.com
+fcpages.com
+dex1.com
+007webpro.com
+22web.net
+125mb.com
+12gbfree.com
+freeweb7.com
+hophost.net
+lookingat.us
+0buckhost.com
+batcave.net
+forever.kz
+web1000.com
+hit.bg
+fatfreehost.com
+quickfreehosting.com
+zxq.net
+zzl.org
+zymichost.com
+webng.com
+nutzworld.net
+freehostplace.com
+hostwq.net
+dreamstation.com
+012webpages.com
+host-page.com
+mokoginta.net
+cileni.com
+ownspace.org
+movillink.com
+mundesweb.com
+vhost4free.com
+700up.com
+siteburg.com
+9skul.com
+datadiri.com
+freehostpage.com
+hostinggratisvenezuela.com
+free-web-hosting.biz
+terapad.com
+10001mb.com
+blogindo.net
+o-f.com
+topfreewebhosting.com
+freeadsensehost.com
diff --git a/conf/drugs.inc b/conf/drugs.inc
new file mode 100644 (file)
index 0000000..444bc85
--- /dev/null
@@ -0,0 +1,68 @@
+# Rspamd variables for drugs emails
+
+$__DRUGS_DIET1="/(?:\b|\s)[_\W]{0,3}p[_\W]{0,3}h[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}n[_\W]{0,3}t[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}r[_\W]{0,3}m[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}n[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}(?:\b|\s)/irP";
+$__DRUGS_DIET2="/(?:\b|\s)_{0,3}[i1!|l\xEC-\xEF][_\W]?o[_\W]?n[_\W]?[a4\xE0-\xE6@][_\W]?m[_\W]?[i1!|l\xEC-\xEF][_\W]?n_{0,3}\b/irP";
+$__DRUGS_DIET3="/\bbontril\b/irP";
+$__DRUGS_DIET4="/\bphendimetrazine\b/irP";
+$__DRUGS_DIET5="/\bdiethylpropion\b/irP";
+$__DRUGS_DIET6="/(?:\b|\s)[_\W]{0,3}M[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}r[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}d[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}(?:\b|\s)/irP";
+$__DRUGS_DIET7="/\b_{0,3}t[_\W]?[e3\xE8-\xEB][_\W]?n[_\W]?u[_\W]?a[_\W]?t[_\W]?[e3\xE8-\xEB]_{0,3}(?:\b|\s)/irP";
+$__DRUGS_DIET8="/\b_{0,3}d[_\W]?[i1!|l\xEC-\xEF][_\W]?d[_\W]?r[_\W][e3\xE8-\xEB[_\W]?xx?_{0,3}\b/irP";
+$__DRUGS_DIET9="/\b_{0,3}a[_\W]?d[_\W]?[i1!|l\xEC-\xEF][_\W]?p[_\W]?[e3\xE8-\xEB][_\W]?x_{0,3}\b/irP";
+$__DRUGS_DIET10="/\b_{0,3}x?x[_\W]?[e3\xE8-\xEB][_\W]?n[_\W]?[i1!|l\xEC-\xEF][_\W]?c[_\W]?[a4\xE0-\xE6@][_\W]?l_{0,3}\b/irP";
+$DRUGS_DIET="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & (${__DRUGS_DIET1} | ${__DRUGS_DIET2} | ${__DRUGS_DIET3} | ${__DRUGS_DIET4} | ${__DRUGS_DIET5} | ${__DRUGS_DIET6} | ${__DRUGS_DIET7} | ${__DRUGS_DIET8} | ${__DRUGS_DIET9} | ${__DRUGS_DIET10})";
+
+$__DRUGS_ERECTILE1="/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[ij1!|l\xEC\xED\xEE\xEF][_\W]{0,3}[a40\xE0-\xE6@][_\W]{0,3}[xyz]?[gj][_\W]{0,3}r[_\W]{0,3}[a40\xE0-\xE6@][_\W]{0,3}x?[_\W]{0,3}(?:\b|\s)/irP";
+$__DRUGS_ERECTILE2="/\bV(?:agira|igara|iaggra|iaegra)\b/irP";
+$__DRUGS_ERECTILE3="/(?:\A|[\s\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f])[_\W]{0,3}C[_\W]{0,3}[ij1!|l\xEC\xED\xEE\xEF][_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}l?[l!|1][_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}s[_\W]{0,3}(?:\b|\s)/irP";
+$__DRUGS_ERECTILE4="/\bC(?:alis|ilias|ilais)\b/irP";
+$__DRUGS_ERECTILE5="/\b_{0,3}s[_\W]?[i1!|l\xEC-\xEF][_\W]?l[_\W]?d[_\W]?[e3\xE8-\xEB][_\W]?n[_\W]?[a4\xE0-\xE6@][_\W]?f[_\W]?[i1!|l\xEC-\xEF][_\W]?l c[_\W]?[i1!|l\xEC-\xEF][_\W]?t[_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?t[_\W]?[e3\xE8-\xEB]_{0,3}(?:\b|\s)/irP";
+$__DRUGS_ERECTILE6="/\b_{0,3}L[_\W]?[e3\xE8-\xEB][_\W]?(?:\\\/|V)[_\W]?[i1!|l\xEC-\xEF][_\W]?t[_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?(?:\b|\s)/irP";
+$__DRUGS_ERECTILE8="/\b_{0,3}T[_\W]?[a4\xE0-\xE6@][_\W]?d[_\W]?[a4\xE0-\xE6@][_\W]?l[_\W]?[a4\xE0-\xE6@][_\W]?f[_\W]?[i1!|l\xEC-\xEF][_\W]?l_{0,3}\b/irP";
+$__DRUGS_ERECTILE10="/\b_{0,3}V[_\W]?(?:i|\&iuml\;)[_\W]?(?:a|\&agrave|\&aring)\;?[_\W]?g[_\W]?r[_\W]?(?:a|\&agrave|\&aring)\b/irP";
+$__DRUGS_ERECTILE11="/(?:\b|\s)_{0,3}[a4\xE0-\xE6@][_\W]{0,3}p[_\W]{0,3}c[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}[l!|1][_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}s_{0,3}\b/irP";
+$DRUGS_ERECTILE="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & (${__DRUGS_ERECTILE1} | ${__DRUGS_ERECTILE2} | ${__DRUGS_ERECTILE3} | ${__DRUGS_ERECTILE4} | ${__DRUGS_ERECTILE5} | ${__DRUGS_ERECTILE6} | ${__DRUGS_ERECTILE8} | ${__DRUGS_ERECTILE10} | ${__DRUGS_ERECTILE11})";
+
+$__DRUGS_ANXIETY1="/(?:\b|\s)[_\W]{0,3}x?x[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}n[_\W]{0,3}[ea4\xE1\xE2\xE3@][_\W]{0,3}xx?_{0,3}\b/irP";
+$__DRUGS_ANXIETY2="/\bAlprazolam\b/irP";
+$__DRUGS_ANXIETY3="/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}[l|][_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[u\xB5\xF9-\xFC][_\W]{0,3}m\b/irP";
+$__DRUGS_ANXIETY4="/\b_{0,3}D[_\W]?[i1!|l\xEC-\xEF][_\W]?[a4\xE0-\xE6@][_\W]?z[_\W]?[ea3\xE9\xEA\xEB][_\W]?p[_\W]?[a4\xE0-\xE6@][_\W]?m_{0,3}\b/irP";
+$__DRUGS_ANXIETY5="/(?:\b|\s)[a4\xE0-\xE6@][_\W]?t[_\W]?[i1!|l\xEC-\xEF][_\W]?v[_\W]?[a4\xE0-\xE6@][_\W]?n_{0,3}\b/irP";
+$__DRUGS_ANXIETY6="/\b_{0,3}l[_\W]?[o0\xF2-\xF6][_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?z[_\W]?[e3\xE8-\xEB][_\W]?p[_\W]?[a4\xE0-\xE6@][_\W]?m_{0,3}\b/irP";
+$__DRUGS_ANXIETY7="/\b_{0,3}c[_\W]?l[_\W]?[o0\xF2-\xF6][_\W]?n[_\W]?[a4\xE0-\xE6@][_\W]?z[_\W]?e[_\W]?p[_\W]?[a4\xE0-\xE6@][_\W]?m\b/irP";
+$__DRUGS_ANXIETY8="/\bklonopin\b/irP";
+$__DRUGS_ANXIETY9="/\brivotril\b/irP";
+$DRUGS_ANXIETY="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & (${__DRUGS_ANXIETY1} | ${__DRUGS_ANXIETY2} | ${__DRUGS_ANXIETY3} | ${__DRUGS_ANXIETY4} | ${__DRUGS_ANXIETY5} | ${__DRUGS_ANXIETY6} | ${__DRUGS_ANXIETY7} | ${__DRUGS_ANXIETY8} | ${__DRUGS_ANXIETY9})";
+$DRUGS_ANXIETY_EREC="${DRUGS_ERECTILE} & ${DRUGS_ANXIETY}";
+
+$__DRUGS_PAIN1="/\b_{0,3}h[_\W]?y[_\W]?d[_\W]?r[_\W]?[o0\xF2-\xF6][_\W]?c[_\W]?[o0\xF2-\xF6][_\W]?d[_\W]?[o0\xF2-\xF6][_\W]?n[_\W]?e_{0,3}\b/irP";
+$__DRUGS_PAIN2="/\b_{0,3}c[o0\xF2-\xF6]deine_{0,3}\b/irP";
+$__DRUGS_PAIN3="/(?:\b|\s)[_\W]{0,3}[u\xB5\xF9-\xFC][_\W]{0,3}l[_\W]{0,3}t[_\W]{0,3}r[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}m_{0,3}\b/irP";
+$__DRUGS_PAIN4="/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}c[_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}d[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}ns?[_\W]{0,3}(?:\b|\s)/irP";
+$__DRUGS_PAIN5="/\b_{0,3}t[_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?m[_\W]?[a4\xE0-\xE6@][_\W]?d[_\W]?[o0\xF2-\xF6][_\W]?[l!|1]_{0,3}\b/irP";
+$__DRUGS_PAIN6="/\b_{0,3}u[_\W]?l[_\W]?t[_\W]?r[_\W]?a[_\W]?c[_\W]?e[_\W]?t_{0,3}\b/irP";
+$__DRUGS_PAIN7="/\b_{0,3}f[_\W]?[i1!|l\xEC-\xEF][_\W]?[o0\xF2-\xF6][_\W]?r[_\W]?[i1!|l\xEC-\xEF][_\W]?c[_\W]?[e3\xE8-\xEB][_\W]?[t7]_{0,3}\b/irP";
+$__DRUGS_PAIN8="/\b_{0,3}c[_\W]?[e3\xE8-\xEB][_\W]?l[_\W]?[e3\xE8-\xEB][_\W]?b[_\W]?r[_\W]?[e3\xE8-\xEB][_\W]?x_{0,3}\b/irP";
+$__DRUGS_PAIN9="/(?:\b|\s)_{0,3}[i1!|l\xEC-\xEF]m[i1!|l\xEC-\xEF]tr[e3\xE8-\xEB]x_{0,3}\b/irP";
+$__DRUGS_PAIN10="/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}x[_\W]{0,3}xx?_{0,3}\b/irP";
+$__DRUGS_PAIN11="/\bzebutal\b/irP";
+$__DRUGS_PAIN12="/\besgic plus\b/irP";
+$__DRUGS_PAIN13="/\bD[_\W]?[a4\xE0-\xE6@][_\W]?r[_\W]?v[_\W]?[o0\xF2-\xF6][_\W]?n\b/irP";
+$__DRUGS_PAIN14="/N[o0\xF2-\xF6]rc[o0\xF2-\xF6]/irP";
+$__DRUGS_PAIN="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & (${__DRUGS_PAIN1} | ${__DRUGS_PAIN2} | ${__DRUGS_PAIN3} | ${__DRUGS_PAIN4} | ${__DRUGS_PAIN5} | ${__DRUGS_PAIN6} | ${__DRUGS_PAIN7} | ${__DRUGS_PAIN8} | ${__DRUGS_PAIN9} | ${__DRUGS_PAIN10} | ${__DRUGS_PAIN11} | ${__DRUGS_PAIN12} || ${__DRUGS_PAIN13} | ${__DRUGS_PAIN14})";
+
+$__DRUGS_SLEEP1="/(?:\b|\s)[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}m[_\W]{0,3}b[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}n[_\W]{0,3}(?:\b|\s)/irP";
+$__DRUGS_SLEEP2="/(?:\b|\s)[_\W]{0,3}S[_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}n[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}t[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}(?:\b|\s)/irP";
+$__DRUGS_SLEEP3="/\b_{0,3}R[_\W]?[e3\xE8-\xEB][_\W]?s[_\W]?t[_\W]?[o0\xF2-\xF6][_\W]?r[_\W]?i[_\W]?l_{0,3}\b/irP";
+$__DRUGS_SLEEP4="/\b_{0,3}H[_\W]?[a4\xE0-\xE6@][_\W]?l[_\W]?c[_\W]?i[_\W]?[o0\xF2-\xF6][_\W]?n_{0,3}\b/irP";
+$__DRUGS_SLEEP="${__DRUGS_SLEEP1} | ${__DRUGS_SLEEP2} | ${__DRUGS_SLEEP3} | ${__DRUGS_SLEEP4}";
+
+$__DRUGS_MUSCLE1="/(?:\b|\s)[_\W]{0,3}s[_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}m[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}(?:\b|\s)/irP";
+$__DRUGS_MUSCLE2="/\b_{0,3}cycl[o0\xF2-\xF6]b[e3\xE8-\xEB]nz[a4\xE0-\xE6@]pr[i1!|l\xEC-\xEF]n[e3\xE8-\xEB]_{0,3}(?:\b|\s)/irP";
+$__DRUGS_MUSCLE3="/\b_{0,3}f[_\W]?l[_\W]?[e3\xE8-\xEB][_\W]?x[_\W]?[e3\xE8-\xEB][_\W]?r[_\W]?[i1!|l\xEC-\xEF]_{0,3}[_\W]?l_{0,3}\b/irP";
+$__DRUGS_MUSCLE4="/\b_{0,3}z[_\W]?a[_\W]?n[_\W]?a[_\W]?f[_\W]?l[_\W]?e[_\W]?x_{0,3}\b/irP";
+$__DRUGS_MUSCLE5="/\bskelaxin\b/irP";
+$DRUGS_MUSCLE="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & (${__DRUGS_MUSCLE1} | ${__DRUGS_MUSCLE2} | ${__DRUGS_MUSCLE3} | ${__DRUGS_MUSCLE4} | ${__DRUGS_MUSCLE5})";
+
+$DRUGS_MANYKINDS="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & regexp_match_number(3, ${DRUGS_ERECTILE}, ${DRUGS_DIET}, ${__DRUGS_PAIN}, ${__DRUGS_SLEEP}, ${DRUGS_MUSCLE}, ${DRUGS_ANXIETY})";
+
diff --git a/conf/fraud.inc b/conf/fraud.inc
new file mode 100644 (file)
index 0000000..2c52471
--- /dev/null
@@ -0,0 +1,56 @@
+# Advance fee spam
+
+$__FRAUD_DBI="/(?:\bdollars?\b|\busd(?:ollars)?(?:[0-9]|\b)|\bus\$|\$[0-9,.]{6,}|\$[0-9].{0,8}[mb]illion|\$[0-9.,]{2,10} ?m|\beuros?\b|u[.]?s[.]? [0-9.]+ m)/irP";
+$__FRAUD_KJV="/(?:claim|concerning) (?:the|this) money/irP";
+$__FRAUD_IRJ="/(?:finance|holding|securit(?:ies|y)) (?:company|firm|storage house)/irP";
+$__FRAUD_NEB="/(?:government|bank) of nigeria/irP";
+$__FRAUD_XJR="/(?:who was a|as a|an? honest|you being a|to any) foreigner/irP";
+$__FRAUD_DPR="/\b(?:(?:respond|reply) (?:urgently|immediately)|(?:urgent|immediate|earliest) (?:reply|response))\b/irP";
+$__FRAUD_PTS="/\b(?:ass?ass?inat(?:ed|ion)|murder(?:e?d)?|kill(?:ed|ing)\b[^.]{0,99}\b(?:war veterans|rebels?))\b/irP";
+$__FRAUD_BEP="/\b(?:bank of nigeria|central bank of|trust bank|apex bank|amalgamated bank)\b/irP";
+$__FRAUD_TDP="/\b(?:business partner(?:s|ship)?|silent partner(?:s|ship)?)\b/irP";
+$__FRAUD_GAN="/\b(?:charles taylor|serena|abacha|gu[eéè]i|sese[- ]?seko|kabila)\b/irP";
+$__FRAUD_IRT="/\b(?:compliments? of the|dear friend|dear sir|yours faithfully|season'?s greetings)\b/irP";
+$__FRAUD_AON="/\b(?:confidential|private|alternate|alternative) (?:(?:e-? *)?mail)\b/irP";
+$__FRAUD_WNY="/\b(?:disburse?(?:ment)?|incurr?(?:ed)?|remunerr?at(?:ed?|ion)|remm?itt?(?:ed|ance|ing)?)\b/irP";
+$__FRAUD_IPK="/\b(?:in|to|visit) your country\b/irP";
+$__FRAUD_QXX="/\b(?:my name is|i am) (?:mrs?|engr|barrister|dr|prince(?:ss)?)[. ]/irP";
+$__FRAUD_IOU="/\b(?:no risks?|risk-? *free|free of risks?|100% safe)\b/irP";
+$__FRAUD_EZY="/\b(?:of|the) late president\b/irP";
+$__FRAUD_MLY="/\b(?:reply|respond)\b[^.]{0,50}\b(?:to|through)\b[^.]{0,50}\@\b/irP";
+$__FRAUD_ZFJ="/\b(?:wife|son|brother|daughter) of the late\b/irP";
+$__FRAUD_KDT="/\bU\.?S\.?(?:D\.?)?\s*(?:\$\s*)?(?:\d+,\d+,\d+|\d+\.\d+\.\d+|\d+(?:\.\d+)?\s*milli?on)/irP";
+$__FRAUD_ULK="/\baffidavits?\b/irP";
+$__FRAUD_BGP="/\battached to ticket number\b/irP";
+$__FRAUD_FBI="/\bdisburs/irP";
+$__FRAUD_JBU="/\bforeign account\b/irP";
+$__FRAUD_YWW="/\bfurnish you with\b/irP";
+$__FRAUD_JYG="/\bgive\s+you .{0,15}(?:fund|money|total|sum|contact|percent)\b/irP";
+$__FRAUD_XVW="/\bhonest cooperation\b/irP";
+$__FRAUD_UUY="/\blegitimate business(?:es)?\b/irP";
+$__FRAUD_SNT="/\blocate(?: .{1,20})? extended relative/irP";
+$__FRAUD_LTX="/\bmilli?on (?:.{1,25} thousand\s*)?(?:(?:united states|u\.?s\.?) dollars|(?i:U\.?S\.?D?))\b/irP";
+$__FRAUD_JNB="/\boperat(?:e|ing)\b[^.]{0,99}\b(?:for(?:ei|ie)gn|off-? ?shore|over-? ?seas?) (?:bank )?accounts?\b/irP";
+$__FRAUD_QFY="/\bover-? *(?:invoiced?|cost(?:s|ing)?)\b/irP";
+$__FRAUD_WDR="/\bprivate lawyer\b/irP";
+$__FRAUD_WFC="/\bsecur(?:e|ing) (?:the )?(?:funds?|monies)\b/irP";
+$__FRAUD_AUM="/\bthe desk of\b/irP";
+$__FRAUD_MCQ="/\btransaction\b.{1,30}\b(?:magnitude|diplomatic|strict|absolute|secret|confiden(?:tial|ce)|guarantee)/irP";
+$__FRAUD_ETX="/\byour\b[^.]{0,99}\b(?:contact (?:details|information)|private (?:e?[- ]?mail|telephone|tel|phone|fax))\b/irP";
+$__FRAUD_PVN="/as the beneficiary/irP";
+$__FRAUD_FVU="/award notification/irP";
+$__FRAUD_CKF="/computer ballot system/irP";
+$__FRAUD_FCW="/fiduciary agent/irP";
+$__FRAUD_MQO="/foreign (?:business partner|customer)/irP";
+$__FRAUD_TCC="/foreign (?:offshore )?(?:bank|account)/irP";
+$__FRAUD_GBW="/god gives .{1,10}second chance/irP";
+$__FRAUD_NRG="/i am contacting you/irP";
+$__FRAUD_RLX="/lott(?:o|ery) (?:co,?ordinator|international)/irP";
+$__FRAUD_AXF="/magnanimity/irP";
+$__FRAUD_THJ="/modalit(?:y|ies)/irP";
+$__FRAUD_YQV="/nigerian? (?:national|government)/irP";
+$__FRAUD_YJA="/over-invoice/irP";
+$__FRAUD_YPO="/the total sum/irP";
+$__FRAUD_UOQ="/vital documents/irP";
+$ADVANCE_FEE_2="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & regexp_match_number(2, ${__FRAUD_KJV}, ${__FRAUD_IRJ}, ${__FRAUD_NEB}, ${__FRAUD_XJR}, ${__FRAUD_EZY}, ${__FRAUD_ZFJ}, ${__FRAUD_KDT}, ${__FRAUD_BGP}, ${__FRAUD_FBI}, ${__FRAUD_JBU}, ${__FRAUD_JYG}, ${__FRAUD_XVW}, ${__FRAUD_SNT}, ${__FRAUD_LTX}, ${__FRAUD_MCQ}, ${__FRAUD_PVN}, ${__FRAUD_FVU}, ${__FRAUD_CKF}, ${__FRAUD_FCW}, ${__FRAUD_MQO}, ${__FRAUD_TCC}, ${__FRAUD_GBW}, ${__FRAUD_NRG}, ${__FRAUD_RLX}, ${__FRAUD_AXF}, ${__FRAUD_THJ}, ${__FRAUD_YQV}, ${__FRAUD_YJA}, ${__FRAUD_YPO}, ${__FRAUD_UOQ}, ${__FRAUD_DBI}, ${__FRAUD_BEP}, ${__FRAUD_DPR}, ${__FRAUD_QXX}, ${__FRAUD_QFY}, ${__FRAUD_PTS}, ${__FRAUD_TDP}, ${__FRAUD_GAN}, ${__FRAUD_IPK}, ${__FRAUD_AON}, ${__FRAUD_WNY}, ${__FRAUD_AUM}, ${__FRAUD_WFC}, ${__FRAUD_YWW}, ${__FRAUD_ULK}, ${__FRAUD_IOU}, ${__FRAUD_JNB}, ${__FRAUD_IRT}, ${__FRAUD_ETX}, ${__FRAUD_WDR}, ${__FRAUD_UUY}, ${__FRAUD_MLY})";
+$ADVANCE_FEE_3="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & regexp_match_number(3, ${__FRAUD_KJV}, ${__FRAUD_IRJ}, ${__FRAUD_NEB}, ${__FRAUD_XJR}, ${__FRAUD_EZY}, ${__FRAUD_ZFJ}, ${__FRAUD_KDT}, ${__FRAUD_BGP}, ${__FRAUD_FBI}, ${__FRAUD_JBU}, ${__FRAUD_JYG}, ${__FRAUD_XVW}, ${__FRAUD_SNT}, ${__FRAUD_LTX}, ${__FRAUD_MCQ}, ${__FRAUD_PVN}, ${__FRAUD_FVU}, ${__FRAUD_CKF}, ${__FRAUD_FCW}, ${__FRAUD_MQO}, ${__FRAUD_TCC}, ${__FRAUD_GBW}, ${__FRAUD_NRG}, ${__FRAUD_RLX}, ${__FRAUD_AXF}, ${__FRAUD_THJ}, ${__FRAUD_YQV}, ${__FRAUD_YJA}, ${__FRAUD_YPO}, ${__FRAUD_UOQ}, ${__FRAUD_DBI}, ${__FRAUD_BEP}, ${__FRAUD_DPR}, ${__FRAUD_QXX}, ${__FRAUD_QFY}, ${__FRAUD_PTS}, ${__FRAUD_TDP}, ${__FRAUD_GAN}, ${__FRAUD_IPK}, ${__FRAUD_AON}, ${__FRAUD_WNY}, ${__FRAUD_AUM}, ${__FRAUD_WFC}, ${__FRAUD_YWW}, ${__FRAUD_ULK}, ${__FRAUD_IOU}, ${__FRAUD_JNB}, ${__FRAUD_IRT}, ${__FRAUD_ETX}, ${__FRAUD_WDR}, ${__FRAUD_UUY}, ${__FRAUD_MLY})";
diff --git a/conf/headers.inc b/conf/headers.inc
new file mode 100644 (file)
index 0000000..29f06b3
--- /dev/null
@@ -0,0 +1,167 @@
+# Different headers violation
+
+# Subject need encoding
+$__SUBJECT_ENCODED_B64 = "Subject=/=\?\S+\?B\?/iX";
+$__SUBJECT_ENCODED_QP="Subject=/=\?\S+\?Q\?/iX";
+$__SUBJECT_NEEDS_MIME="Subject=/[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/X";
+$SUBJECT_NEEDS_ENCODING = "!${__SUBJECT_ENCODED_B64} & !${__SUBJECT_ENCODED_QP} & ${__SUBJECT_NEEDS_MIME}";
+$__HAS_SUBJECT="header_exists(Subject)";
+$__EMPTY_SUBJECT="Subject=/^$/";
+$MISSING_SUBJECT="!${__HAS_SUBJECT} | ${__EMPTY_SUBJECT}";
+$__R_RCVD_POCHTA_RU="Received=/by mail\d\.ks\.pochta\.ru \( sendmail 8\.\d{2}\.\d\/8\.\d{2}\.\d\) with esmtpa id/H";
+$__R_MUA_OUTLOOK="X-Mailer=/^Microsoft Outlook Express/Hr";
+$__R_MUA_THEBAT="X-Mailer=/^The Bat!/H";
+$__R_CTYPE_TEXT="content_type_is_type(text)";
+$__R_CTE_7BIT="compare_transfer_encoding(7bit)";
+$__R_BODY_8BIT="/[^\x01-\x7f]/Mr";
+$R_BAD_CTE_7BIT="${__R_CTYPE_TEXT} & ${__R_CTE_7BIT} & ${__R_BODY_8BIT}";
+$R_TLD_TK = "/\.tk$/U";
+$R_POCHTA_RU = "${__R_RCVD_POCHTA_RU} & ${R_TLD_TK} & ${SUBJECT_NEEDS_ENCODING}";
+$R_TMP_SPAMMY_MAILER = "X-Mailer=/^(?:Exim 3\.12|Gentoo|Qmail 2\.67|Sendmail 3\.84\/3\.84|WebPOP 1\.0|mLogic)/H";
+$R_WWW_EKONF_COM = "${__R_MEGA_TABLE} & ${__R_GREEK_SYMBOLS}";
+$R_FREE_HOSTING_NAROD = "/\.narod\.ru/U";
+$R_TINYURL = "/http:\/\/(?:tinyurl\.com|snipr\.com|b23\.ru)\/\w/U";
+$R_FREE_HOSTING = "/\.(?:fromru\.com|front\.ru|hotbox\.ru|hotmail\.ru|krovatka\.su|land\.ru|mail15\.com|mail333\.com|newmail\.ru|nightmail\.ru|nm\.ru|pisem\.net|pochtamt\.ru|pop3\.ru|rbcmail\.ru|smtp\.ru)/U";
+
+$__HAS_TO="header_exists(To)";
+$MISSING_TO="!${__HAS_TO}";
+$__UNDISC_RCPT="To=/^<?undisclosed-recipient/Hi";
+$R_UNDISC_RCPT="${MISSING_TO} | ${__UNDISC_RCPT}";
+
+$__HAS_MID="header_exists(Message-Id)";
+$MISSING_MID="!${__HAS_MID}";
+$R_RCVD_SPAMBOTS="Received=/^from \[\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\] by [-.\w+]{5,255}; [SMTWF][a-z][a-z], [\s\d]?\d [JFMAJSOND][a-z][a-z] \d{4} \d{2}:\d{2}:\d{2} [-+]\d{4}$/mH";
+$R_TO_SEEMS_AUTO="To=/\"?(?<bt>[-.\w]{1,64})\"?\s<\k<bt>\@/H";
+$R_MISSING_CHARSET="content_type_is_type(text) & !content_type_has_param(charset)";
+$R_SAJDING="Subject=/\bsajding(?:om|a)?\b/iH";
+$__R_MUA_MPOP_WEBMAIL="X-Mailer=/^mPOP Web-Mail \d\.\d{2}$/H";
+$__R_MID_MAILRU="Message-Id=/\@w+\.mail\.ru>$/H";
+$__R_RCVD_FROM_MAILRU="Received=/ by [a-z\.]+\d*\.mail\.ru with /H";
+$__R_X_RCVD_FROM_MAILRU="X-Received=/ by [a-z\.]+\d*\.mail\.ru with /H";
+$R_FORGED_MPOP_WEBMAIL="${__R_MUA_MPOP_WEBMAIL} & !(${__R_RCVD_FROM_MAILRU} | ${__R_X_RCVD_FROM_MAILRU} | ${__R_MID_MAILRU})";
+$__R_BGCOLOR="/BGCOLOR=/iM";
+$__R_FONT_COLOR="/font color=[\"']?\#FFFFFF[\"']?/iM";
+$R_WHITE_ON_WHITE="(!${__R_BGCOLOR} & ${__R_FONT_COLOR})";
+$R_NO_SPACE_IN_FROM="From=/\S<[-\w\.]+\@[-\w\.]+>/X";
+$R_FLASH_REDIR_IMGSHACK="/^(?:http:\/\/)?img\d{1,5}\.imageshack\.us\/\S+\.swf/U";
+$__R_RCVD_FROM_VALUEHOST="Received=/\sb0\.valuehost\.ru/H";
+$__R_CYR_PHONE="/8 \(\xD799\)/P";
+
+$R_SPAM_FROM_VALUEHOST="${__R_RCVD_FROM_VALUEHOST} & ${__R_CYR_PHONE}";
+$__HAS_USER_AGENT="header_exists(User-Agent)";
+$__HAS_X_MAILER="header_exists(X-Mailer)";
+
+$__R_RCVD_FROM_MTU="Received=/smtp\d*\.mtu\.ru/H";
+$__R_MID_MTU="Message-Id=/\@smtp\d*\.mtu\.ru>$/H";
+
+$__R_RCVD_FROM_ONO="Received=/smtp\d*\.ono\.com/H";
+$__R_MID_ONO="Message-Id=/\@ono\.com>$/H";
+
+$__R_RCVD_FROM_VERSATEL="Received=/mail\d*do\.versatel\.de/H";
+$__R_MID_VERSATEL="Message-Id=/\@versanet\.de>$/H";
+
+$__R_RCVD_FROM_LIBERO="Received=/cp-out\d+\.libero\.it/H";
+$__R_MID_LIBERO="Message-Id=/[\da-f]{12}\.[\da-f]{16}@/H";
+
+$R_SPAM_FROM_MTU="!(${__HAS_X_MAILER} | ${__HAS_USER_AGENT}) & ${__R_RCVD_FROM_MTU} & ${__R_MID_MTU}"; 
+$R_SPAM_FROM_ONO="!(${__HAS_X_MAILER} | ${__HAS_USER_AGENT}) & ${__R_RCVD_FROM_ONO} & ${__R_MID_ONO}";
+$R_SPAM_FROM_VERSATEL="!(${__HAS_X_MAILER} | ${__HAS_USER_AGENT}) & ${__R_RCVD_FROM_VERSATEL} & ${__R_MID_VERSATEL}";
+$R_SPAM_FROM_LIBERO="!(${__HAS_X_MAILER} | ${__HAS_USER_AGENT}) & ${__R_RCVD_FROM_LIBERO} & ${__R_MID_LIBERO}";
+#$R_FAKE_OUTLOOK="${__R_MUA_OUTLOOK}";
+# $R_FAKE_OUTLOOK="${__R_MUA_OUTLOOK} & (${SUBJECT_NEEDS_ENCODING} | ${R_BAD_CTE_7BIT})";
+$R_FAKE_OUTLOOK="${__R_MUA_OUTLOOK} & ${R_BAD_CTE_7BIT}";
+$R_FAKE_THEBAT="${__R_MUA_THEBAT} & ${SUBJECT_NEEDS_ENCODING}";
+
+$__YAHOO_BULK="Received=/from \[\S+\] by \S+\.(?:groups|scd|dcn)\.yahoo\.com with NNFMP/H";
+$__ANY_OUTLOOK_MUA="X-Mailer=/^Microsoft Outlook\b/H";
+$MIME_HTML_ONLY="has_only_html_part()";
+$FORGED_OUTLOOK_HTML="!${__YAHOO_BULK} & ${__ANY_OUTLOOK_MUA} & ${MIME_HTML_ONLY}";
+$SUSPICIOUS_RECIPS="compare_recipients_distance(0.65)";
+$SORTED_RECIPS="is_recipients_sorted()";
+$TRACKER_ID="/^[a-z0-9]{6,24}[-_a-z0-9]{2,36}[a-z0-9]{6,24}\s*\z/isPr";
+$__FROM_ENCODED_B64="From=/\=\?\S+\?B\?/iX";
+$__FROM_NEEDS_MIME="From=/[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/H";
+$FROM_EXCESS_BASE64="${__FROM_ENCODED_B64} & !${__FROM_NEEDS_MIME}";
+
+$__OE_MUA="X-Mailer=/\bOutlook Express [456]\./H";
+$__OE_MSGID_1="Message-Id=/^[A-Za-z0-9-]{7}[A-Za-z0-9]{20}\@hotmail\.com$/mH";
+$__OE_MSGID_2="Message-Id=/^(?:[0-9a-f]{8}|[0-9a-f]{12})\$[0-9a-f]{8}\$[0-9a-f]{8}\@\S+$/mH";
+$__LYRIS_EZLM_REMAILER="List-Unsubscribe=/<mailto:(?:leave-\S+|\S+-unsubscribe)\@\S+>$/H";
+#$__GATED_THROUGH_RCVD_REMOVER="gated_through_received_hdr_remover()";
+$__WACKY_SENDMAIL_VERSION="Received=/\/CWT\/DCE\)/H";
+$__IPLANET_MESSAGING_SERVER="Received=/iPlanet Messaging Server/H";
+$__HOTMAIL_BAYDAV_MSGID="Message-Id=/^BAY\d+-DAV\d+[A-Z0-9]{25}\@phx\.gbl$/mH";
+$__SYMPATICO_MSGID="Message-Id=/^BAYC\d+-PASMTP\d+[A-Z0-9]{25}\@CEZ\.ICE$/mH";
+# $__UNUSABLE_MSGID="${__LYRIS_EZLM_REMAILER} | ${__GATED_THROUGH_RCVD_REMOVER} | ${__WACKY_SENDMAIL_VERSION} | ${__IPLANET_MESSAGING_SERVER} | ${__HOTMAIL_BAYDAV_MSGID} | ${__SYMPATICO_MSGID}";
+$__UNUSABLE_MSGID="${__LYRIS_EZLM_REMAILER} | ${__WACKY_SENDMAIL_VERSION} | ${__IPLANET_MESSAGING_SERVER} | ${__HOTMAIL_BAYDAV_MSGID} | ${__SYMPATICO_MSGID}";
+$__FORGED_OE="${__OE_MUA} & !{__OE_MSGID_1 & !${__OE_MSGID_2} & !{__UNUSABLE_MSGID}";
+$__OUTLOOK_DOLLARS_MUA="X-Mailer=/^Microsoft Outlook(?: 8| CWS, Build 9|, Build 10)\./H";
+$__OUTLOOK_DOLLARS_OTHER="Message-Id=/^\!\~\!/mH";
+$__VISTA_MSGID="Message-Id=/^[A-F\d]{32}\@\S+$/mH";
+$__IMS_MSGID="Message-Id=/^[A-F\d]{36,40}\@\S+$/mH";
+$__FORGED_OUTLOOK_DOLLARS="${__OUTLOOK_DOLLARS_MUA} & !${__OE_MSGID_2} & !${__OUTLOOK_DOLLARS_OTHER} & !${__VISTA_MSGID} & !${__IMS_MSGID} & !${__UNUSABLE_MSGID}";
+$__FMO_EXCL_O3416="X-Mailer=/^Microsoft Outlook, Build 10.0.3416$/H";
+$__FMO_EXCL_OE3790="X-Mailer=/^Microsoft Outlook Express 6.00.3790.3959$/H";
+$FORGED_MUA_OUTLOOK="(${__FORGED_OE} | ${__FORGED_OUTLOOK_DOLLARS}) & !${__FMO_EXCL_O3416} & !${__FMO_EXCL_OE3790} & !${__VISTA_MSGID}";
+
+$__SANE_MSGID="Message-Id=/^[^<>\\ \t\n\r\x0b\x80-\xff]+\@[^<>\\ \t\n\r\x0b\x80-\xff]+\s*$/mH";
+$__MSGID_COMMENT="Messagr-Id=/\(.*\)/mH";
+$INVALID_MSGID="${__HAS_MID} & !(${__SANE_MSGID} | ${__MSGID_COMMENT})";
+$HTML_MIME_NO_HTML_TAG="${MIME_HTML_ONLY} & !${__TAG_EXISTS_HTML}";
+$__CD="header_exists(Content-Disposition)";
+$__CTE="header_exists(Content-Transfer-Encoding)";
+$__CT="header_exists(Content-Type)";
+$__MIME_VERSION="header_exists(MIME-Version)";
+#$__CT_TEXT_PLAIN="Content-Type=/^text\/plain\b/iH";
+$__CT_TEXT_PLAIN="content_type_is_type(text) & content_type_is_subtype(plain)";
+$MIME_HEADER_CTYPE_ONLY="!${__CD} & !${__CTE} & ${__CT} & !${__MIME_VERSION} & !${__CT_TEXT_PLAIN}";
+
+$__HAS_MSMAIL_PRI="header_exists(X-MSMail-Priority)";
+$__HAS_MIMEOLE="header_exists(X-MimeOLE)";
+$__HAS_SQUIRRELMAIL_IN_MAILER="X-Mailer=/SquirrelMail\b/H";
+$MISSING_MIMEOLE="${__HAS_MSMAIL_PRI} & !${__HAS_MIMEOLE} & !${__HAS_SQUIRRELMAIL_IN_MAILER}";
+$__MSGID_DOLLARS_OK="Message-Id=/[0-9a-f]{4,}\$[0-9a-f]{4,}\$[0-9a-f]{4,}\@\S+/Hr";
+$__MIMEOLE_MS="X-MimeOLE=/^Produced By Microsoft MimeOLE/H";
+$__RCVD_WITH_EXCHANGE="Received=/with Microsoft Exchange Server/H";
+$RATWARE_MS_HASH="${__MSGID_DOLLARS_OK} & !${__MIMEOLE_MS} & !${__RCVD_WITH_EXCHANGE}";
+$STOX_REPLY_TYPE="Content-Type=/text\/plain; .* reply-type=original/H";
+$__FHELO_VERIZON="X-Spam-Relays-Untrusted=/^[^\]]+ helo=[^ ]+verizon\.net /iH";
+$__FHOST_VERIZON="X-Spam-Relays-Untrusted=/^[^\]]+ rdns=[^ ]+verizon\.net /iH";
+$FM_FAKE_HELO_VERIZON="${__FHELO_VERIZON} & !${__FHOST_VERIZON}";
+$__AT_YAHOO_MSGID="Message-Id=/\@yahoo\.com\b/iH";
+$__FROM_YAHOO_COM="From=/\@yahoo\.com\b/iH";
+$FORGED_MSGID_YAHOO="${__AT_YAHOO_MSGID} & !${__FROM_YAHOO_COM}";
+
+$__THEBAT_MUA_V1="X-Mailer=/^The Bat! \(v1\./H";
+$__CTYPE_HAS_BOUNDARY="Content-Type=/boundary/iH";
+$__BAT_BOUNDARY="Content-Type=/boundary=\"?-{10}/H";
+$__MAILMAN_21="X-Mailman-Version=/\d/H";
+$__DOUBLE_IP_SPAM_1="Received=/from \[\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\] by \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} with/H";
+$__DOUBLE_IP_SPAM_2="Received=/from\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s+by\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3};/H";
+$FORGED_MUA_THEBAT_BOUN="${__THEBAT_MUA_V1} & ${__CTYPE_HAS_BOUNDARY} & !${__BAT_BOUNDARY} & !${__MAILMAN_21}";
+$RCVD_DOUBLE_IP_SPAM="${__DOUBLE_IP_SPAM_1} | ${__DOUBLE_IP_SPAM_2}";
+
+$__REPTO_QUOTE="Reply-To=/\".*\"\s*\</H";
+$__FROM_YAHOO_COM="From=/\@yahoo\.com\b/iH";
+$__AT_YAHOO_MSGID="Message-Id=/\@yahoo\.com\b/iH";
+$REPTO_QUOTE_YAHOO="${__REPTO_QUOTE} & (${__FROM_YAHOO_COM} | ${__AT_YAHOO_MSGID})";
+
+
+$__XM_GNUS="X-Mailer=/^Gnus v/H";
+$__XM_MSOE5="X-Mailer=/^Microsoft Outlook Express 5/H";
+$__XM_MSOE6="X-Mailer =~ /^Microsoft Outlook Express 6/H";
+$__XM_MOZ4="X-Mailer=/^Mozilla 4/H";
+$__XM_SKYRI="X-Mailer=/^SKYRiXgreen/H";
+$__XM_WWWMAIL="X-Mailer=/^WWW-Mail \d/H";
+$__UA_GNUS="User-Agent=/^Gnus/H";
+$__UA_KNODE="User-Agent=/^KNode/H";
+$__UA_MUTT="User-Agent=/^Mutt/H";
+$__UA_PAN="User-Agent=/^Pan/H";
+$__UA_XNEWS="User-Agent=/^Xnews/H";
+$__NO_INR_YES_REF="${__XM_GNUS} | ${__XM_MSOE5} | ${__XM_MSOE6} | ${__XM_MOZ4} | ${__XM_SKYRI} | ${__XM_WWWMAIL} | ${__UA_GNUS} | ${__UA_KNODE} | ${__UA_MUTT} | ${__UA_PAN} | ${__UA_XNEWS}";
+
+$__SUBJ_RE="Subject=/^R[eE]:/H";
+$__HAS_REF="header_exists(References)";
+$__MISSING_REF="!${__HAS_REF}";
+$FAKE_REPLY_C="${__SUBJ_RE} & ${__MISSING_REF} & ${__NO_INR_YES_REF}";
+
diff --git a/conf/html.inc b/conf/html.inc
new file mode 100644 (file)
index 0000000..997b337
--- /dev/null
@@ -0,0 +1,28 @@
+# HTML related rules
+
+$__MIME_HTML="content_type_is_type(text) & content_type_is_subtype(/.?html/)";
+$__TAG_EXISTS_HTML="/<html/iPr";
+$__TAG_EXISTS_HEAD="/<head>/iPr";
+$__TAG_EXISTS_META="/<meta /iPr";
+$__TAG_EXISTS_BODY="/<body/iPr";
+$FORGED_OUTLOOK_TAGS="!${__YAHOO_BULK} & ${__ANY_OUTLOOK_MUA} & ${__MIME_HTML} & !(${__TAG_EXISTS_HTML} & ${__TAG_EXISTS_HEAD} & ${__TAG_EXISTS_META} & ${__TAG_EXISTS_BODY})";
+$__HTML_LENGTH_1024_1536="has_content_part_len('text', 'html', 1024, 1536)";
+$__HTML_LINK_IMAGE="/<img /iPr";
+$HTML_SHORT_LINK_IMG_2="${__HTML_LENGTH_1024_1536} & ${__HTML_LINK_IMAGE}";
+
+$__R_BGCOLOR="/BGCOLOR=/iM";
+$__R_FONT_COLOR="/font color=[\"']?\#FFFFFF[\"']?/iM";
+$R_WHITE_ON_WHITE="(!${__R_BGCOLOR} & ${__R_FONT_COLOR})";
+$R_NO_SPACE_IN_FROM="From=/\S<[-\w\.]+\@[-\w\.]+>/X";
+$R_FLASH_REDIR_IMGSHACK="/^(?:http:\/\/)?img\d{1,5}\.imageshack\.us\/\S+\.swf/U";
+$__R_RCVD_FROM_VALUEHOST="Received=/\sb0\.valuehost\.ru/H";
+$__R_CYR_PHONE="/8 \(\xD799\)/P";
+
+$R_SPAM_FROM_VALUEHOST="${__R_RCVD_FROM_VALUEHOST} & ${__R_CYR_PHONE}";
+
+$__R_MEGA_TABLE = "/<table border=\"0\" width=\"1\" height=\"30\">\n\s+?<tr>\n\s+?<td bgcolor=\"\#000000\"><\/td>\n\s+?<td><\/td>\n\s+?<td bgcolor=\"\#000000\"><\/td>\n\s+?<td><\/td>\n\s+?<td bgcolor=\"\#000000\"><\/td>\n\s+?<td><\/td>\n\s+?<td bgcolor=\"\#000000\"><\/td>\n\s+?<td><\/td>\n\s+?<td bgcolor=\"\#000000\"><\/td>\n\s+?<td><\/td>\n\s+?<td bgcolor=\"\#000000\"><\/td>\n\s+?<td><\/td>\n\s+?<td><\/td>\n\s+?<td><\/td>/Ps";
+$__R_GREEK_SYMBOLS = "/&\#(?:1293|261|1218|1197|1245|1187|1116|569|1117|267|1179|593|1008|1247|311||311|491)/P";
+
+$MIME_HTML_ONLY="has_only_html_part()";
+$FORGED_OUTLOOK_HTML="!${__YAHOO_BULK} & ${__ANY_OUTLOOK_MUA} & ${MIME_HTML_ONLY}";
+
diff --git a/conf/lotto.inc b/conf/lotto.inc
new file mode 100644 (file)
index 0000000..78ffdea
--- /dev/null
@@ -0,0 +1,16 @@
+# Rules for lottery spam
+
+
+$__R_LOTTO_FROM="From=/(?:lottery|News center|congratulation to you|NED INFO|BRITISH NATIONAL HEADQUATERS|MICROSOFT ON LINE SUPPORT TEAM|prize|online notification)/iH";
+$__R_LOTTO_SUBJECT="Subject=/(?:\xA3\d|pounds?|FINAL NOTIFICATION|FOR YOUR ATTENTION|File in Your Claims?|ATTN|prize|Claims requirement|amount|confirm|your e-mail address won|congratulations)/iH";
+$__R_LOTTO_BODY="/(?:won|winning|\xA3\d|pounds?|GBP|LOTTERY|awards|prize)/isrP";
+$__KAM_LOTTO1="/(e-?mail address (have emerged a winner|has won|attached to (ticket|reference)|was one of the ten winners)|random selection in our computerized email selection system)/isrP";
+$__KAM_LOTTO2="/((ticket|serial|lucky) number|secret pin ?code|batch number|reference number|promotion date)/isrP";
+$__KAM_LOTTO3="/(won|claim|cash prize|pounds? sterling)/isrP";
+$__KAM_LOTTO4="/(claims (officer|agent)|lottery coordinator|fiduciary (officer|agent)|fiduaciary claims)/isrP";
+$__KAM_LOTTO5="/(freelotto group|Royal Heritage Lottery|UK National (Online)? Lottery|U\.?K\.? Grand Promotions|Lottery Department UK|Euromillion Loteria|Luckyday International Lottery|International Lottery)/isrP";
+$__KAM_LOTTO6="/(Dear Lucky Winner|Winning Notification|Attention:Winner|Dear Winner)/isrP";
+$__KAM_LOTTO7="Subject=/(Your Lucky Day|(Attention:|ONLINE) WINNER)/iH";
+$KAM_LOTTO1="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & regexp_match_number(3, ${__KAM_LOTTO1}, ${__KAM_LOTTO2}, ${__KAM_LOTTO3}, ${__KAM_LOTTO4}, ${__KAM_LOTTO5}, ${__KAM_LOTTO6}, ${__KAM_LOTTO7})";
+$R_LOTTO="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & regexp_match_number(3, ${__KAM_LOTTO1},  ${__KAM_LOTTO2}, ${__KAM_LOTTO3}, ${__KAM_LOTTO4}, ${__KAM_LOTTO5}, ${__KAM_LOTTO6})";
+
diff --git a/conf/surbl-whitelist.inc b/conf/surbl-whitelist.inc
new file mode 100644 (file)
index 0000000..de0dfa4
--- /dev/null
@@ -0,0 +1,23 @@
+#
+# $Id: rspamd-whitelist.conf,v 1.1 2009/06/11 12:25:37 dmx Exp $
+#
+1gost.info
+autorambler.ru
+easylnk.com
+google.ru
+li.ru
+list.ru
+liveinternet.ru
+msn.com
+nod32.com
+price.ru
+rambler-co.ru
+rambler.ru
+subscribe.ru
+tns-counter.ru
+top4top.ru
+wordpress.com
+yandex.ru
+yandex.net
+youtube.com
+walmart.com
index a1f6675114eb5c6888e76095607cb0d0017e0803..e608abdbd4416235e4c506a68f4252b2880f65f7 100755 (executable)
@@ -43,13 +43,13 @@ sub parse_config {
 
     my $ctrl = 0, $skip = 0;
     while (<CONF>) {
-        if ($_ =~ /control\s*{/i) {
+        if ($_ =~ /^.*type.*=.*controller.*$/i) {
             $ctrl = 1;
         }
         if ($ctrl && $_ =~ /}/) {
             $ctrl = 0;
         }
-        if ($_ =~ /lmtp\s*{/i || $_ =~ /delivery\s*{/i) {
+        if ($_ =~ /^.*type.*=.*(?:lmtp|delivery).*$/i) {
             $skip = 1;
         }
         if ($skip && $_ =~ /}/) {
index 42cf9d2a5a302c0ae8074ce8270ccc65408bf08d..23f31353a205a7e3b2734aecba2088fb50a02690 100644 (file)
@@ -2,10 +2,16 @@
 # $Id$
 #
 
+.include @CMAKE_INSTALL_PREFIX@/etc/rspamd/drugs.inc
+.include @CMAKE_INSTALL_PREFIX@/etc/rspamd/lotto.inc
+.include @CMAKE_INSTALL_PREFIX@/etc/rspamd/fraud.inc
+.include @CMAKE_INSTALL_PREFIX@/etc/rspamd/headers.inc
+.include @CMAKE_INSTALL_PREFIX@/etc/rspamd/html.inc
+
 
 # pidfile - path to pid file
 # Default: pidfile = /var/run/rspamd.pid
-pidfile = "./rspamd.pid";
+pidfile = "/var/run/rspamd.pid";
 
 
 worker {
@@ -46,7 +52,7 @@ logging {
        # Log type can be: console, syslog and file
        log_type = console;
        # Log level can be: DEBUG, INFO, WARN and ERROR
-       log_level = DEBUG;
+       log_level = INFO;
        # Log facility specifies facility for syslog logging, see syslog (3) for details
        # log_facility = "LOG_MAIL";
        
@@ -60,61 +66,128 @@ statfile_pool_size = 40M;
 
 
 # Sample statfile definition
-statfile {
+#statfile {
        # Alias is used for learning and is used as symbol
-       alias = "test.spam";
+       #alias = "test.spam";
        # Pattern is path to file, can include %r - recipient name and %f - mail from value
-       pattern = "./test.spam";
+       #pattern = "./test.spam";
        # Weight in spam/ham classifier
-       weight = 1.0;
+       #weight = 1.0;
        # Size of this statfile class
-       size = 10M;
+       #size = 10M;
        # Tokenizer for this statfile
        # Deafault: osb-text
-       tokenizer = "osb-text";
-};
-statfile {
-       alias = "test.ham";
-       pattern = "./test.ham";
-       weight = -2.0;
-       size = 10M;
-};
+       #tokenizer = "osb-text";
+#};
+#statfile {
+       #alias = "test.ham";
+       #pattern = "./test.ham";
+       #weight = -2.0;
+       #size = 10M;
+#};
 
 # Factors coefficients
 factors {
-       "SURBL_MULTI" = 10.5;
-       "winnow" = 5.5;
-};
+    # SURBL's
+       "SC_SURBL_MULTI" = 5.5;
+       "WS_SURBL_MULTI" = 5.5;
+       "PH_SURBL_MULTI" = 5.5;
+       "OB_SURBL_MULTI" = 5.5;
+       "AB_SURBL_MULTI" = 5.5;
+       "JP_SURBL_MULTI" = 5.5;
+       "RAMBLER_URIBL" = 10.5;
+    
+    # Regexp factors
+       "R_TINYURL" = 2;
+       "R_FREE_HOSTING" = 4;
+       "R_FREE_HOSTING_NAROD" = 3;
+       "R_WWW_EKONF_COM" = 10;
+       "SUBJECT_NEEDS_ENCODING" = 1;
 
+       "R_POCHTA_RU" = 10;
+       "R_BAD_CTE_7BIT" = 6;
+       "R_UNDISC_RCPT" = 5;
+       "MISSING_MID" = 3;
+       "R_RCVD_SPAMBOTS" = 3;
+       "R_TO_SEEMS_AUTO" = 3;
+       "R_MISSING_CHARSET" = 5;
+       "R_SAJDING" = 8;
+       "R_FORGED_MPOP_WEBMAIL" = 8;
+       "R_WHITE_ON_WHITE" = 9;
+       "R_NO_SPACE_IN_FROM" = 3;
+       "R_FLASH_REDIR_IMGSHACK" = 10;
+       "R_SPAM_FROM_VALUEHOST" = 10;
+       "R_SPAM_FROM_MTU" = 8;
+       "R_SPAM_FROM_ONO" = 10;
+       "R_SPAM_FROM_VERSATEL" = 10;
+       "R_SPAM_FROM_LIBERO" = 10;
+       "R_FAKE_OUTLOOK"= 8;
+       "R_FAKE_THEBAT"= 8;
+       "R_LOTTO" = 5;
+       "KAM_LOTTO1" = 7;
+       "FORGED_OUTLOOK_HTML" = 5;
+       "SUSPICIOUS_RECIPS" = 3.5;
+       "HTML_TAG_BALANCE_HEAD" = 5;
+       "SORTED_RECIPS" = 3.5;
+       "TRACKER_ID" = 3.843;
+       "ADVANCE_FEE_2" = 3.300;
+       "ADVANCE_FEE_3" = 2.121;
+       "FORGED_MUA_OUTLOOK" = 3;
+       "FORGED_OUTLOOK_TAGS" = 2.099;
+       "HTML_SHORT_LINK_IMG_2" = 3;
+       "INVALID_MSGID" = 5;
+       "HTML_MIME_NO_HTML_TAG" = 2;
+       "MIME_HEADER_CTYPE_ONLY" = 2;
+       "MISSING_MIMEOLE" = 5;
+       "MISSING_SUBJECT" = 2;
+       "RATWARE_MS_HASH" = 2;
+       "STOX_REPLY_TYPE" = 1;
+       "FM_FAKE_HELO_VERIZON" = 2;
+       "FORGED_MSGID_YAHOO" = 2;
+       "FORGED_MUA_THEBAT_BOUN" = 2;
+       "RCVD_DOUBLE_IP_SPAM" = 2;
+       "REPTO_QUOTE_YAHOO" = 2;
+       "DRUGS_DIET" = 2;
+       "DRUGS_ERECTILE" = 2;
+       "DRUGS_ANXIETY" = 2;
+       "DRUGS_ANXIETY_EREC" = 2;
+       "DRUGS_MANYKINDS" = 2;
+       "FAKE_REPLY_C" = 6;
+       "MIME_HTML_ONLY" = 1;
+    
+    # Modules factors
+    "R_MIXED_CHARSET" = 5;
+    "R_BAD_EMAIL" = 10.5;
+};
 # Options for lmtp worker
-worker {
-       type = "lmtp";
+#worker {
+       #type = "lmtp";
        # Bind socket for lmtp interface
-       bind_socket = localhost:11335;
+       #bind_socket = localhost:11335;
        # Metric that is considered as main. If we have spam result on
        # this metric, lmtp delivery would be failed
-       metric = "default";
+       #metric = "default";
        # Number of lmtp workers
-       count = 1;
-};
+       #count = 1;
+#};
 
 #worker {
-#      type = "delivery";
+       #type = "delivery";
        # Path to delivery agent, %f is expanded as mail from address and %r 
        # is expanded as recipient address
        # Expample: agent = "/usr/local/bin/procmail -f %f -d %r"
-#      agent = "/dev/null";
+       #agent = "/dev/null";
        # Bind socket for lmtp interface
        # Example: bind_socket = localhost:25
        
        # Whether we should use lmtp for MTA delivery
-#      lmtp = no;
+       #lmtp = no;
 #};
 
 # SURBL module params, note that single quotes are mandatory here
 .module 'surbl' {
        # Address to redirector in host:port format
-       redirector = "localhost:8080";
+       #redirector = "localhost:8080";
        # Connect timeout for redirector
        redirector_connect_timeout = "1s";
        # IO timeout for redirector (may be usefull to set this value rather big)
@@ -136,30 +209,80 @@ worker {
        # Metric for surbl module
        metric = "default";
        # List of public known hostings (for which we should use 3 components of domain name instead of 2)
-       2tld = "file:///some/path/file";
+       2tld = "file://@CMAKE_INSTALL_PREFIX@/etc/rspamd/2tld.inc";
        # Whitelisted urls
-       whitelist = "file:///some/other/file";
+       whitelist = "file://@CMAKE_INSTALL_PREFIX@/etc/rspamd/surbl-whitelist.inc";
 };
 
-
-$to_blah = "To=/\"blah@blah\"/H";
-$from_blah = "From=/blah@blah/H";
-$subject_blah = "Subject=/blah/H";
-
 .module 'regexp' {
-       BLAH_SYMBOL = "${to_blah} & !(${from_blah} | ${subject_blah})";
+       R_TINYURL = "${R_TINYURL}";
+       R_FREE_HOSTING = "${R_FREE_HOSTING}";
+       R_WWW_EKONF_COM = "${R_WWW_EKONF_COM}";
+       R_FREE_HOSTING_NAROD = "${R_FREE_HOSTING_NAROD}";
+       SUBJECT_NEEDS_ENCODING = "${SUBJECT_NEEDS_ENCODING}";
+
+       R_POCHTA_RU = "${R_POCHTA_RU}";
+       R_BAD_CTE_7BIT="${R_BAD_CTE_7BIT}";
+       R_UNDISC_RCPT="${R_UNDISC_RCPT}";
+       MISSING_MID="${MISSING_MID}";
+       R_RCVD_SPAMBOTS="${R_RCVD_SPAMBOTS}";
+       R_TO_SEEMS_AUTO="${R_TO_SEEMS_AUTO}";
+       R_SAJDING="${R_SAJDING}";
+       R_FORGED_MPOP_WEBMAIL="${R_FORGED_MPOP_WEBMAIL}";
+       R_WHITE_ON_WHITE="${R_WHITE_ON_WHITE}";
+       R_NO_SPACE_IN_FROM="${R_NO_SPACE_IN_FROM}";
+       R_FLASH_REDIR_IMGSHACK="${R_FLASH_REDIR_IMGSHACK}";
+       R_SPAM_FROM_VALUEHOST="${R_SPAM_FROM_VALUEHOST}";
+       R_SPAM_FROM_MTU="${R_SPAM_FROM_MTU}";
+       R_SPAM_FROM_ONO="${R_SPAM_FROM_ONO}";
+       R_SPAM_FROM_VERSATEL="${R_SPAM_FROM_VERSATEL}";
+       R_SPAM_FROM_LIBERO="${R_SPAM_FROM_LIBERO}";
+       R_FAKE_OUTLOOK="${R_FAKE_OUTLOOK}";
+       R_FAKE_THEBAT="${R_FAKE_THEBAT}";
+       R_MISSING_CHARSET="${R_MISSING_CHARSET}";
+       R_LOTTO="${R_LOTTO}";
+       KAM_LOTTO1="${KAM_LOTTO1}";
+       FORGED_OUTLOOK_HTML="${FORGED_OUTLOOK_HTML}";
+       SUSPICIOUS_RECIPS="${SUSPICIOUS_RECIPS}";
+       SORTED_RECIPS="${SORTED_RECIPS}";
+       TRACKER_ID="${TRACKER_ID}";
+       ADVANCE_FEE_2="${ADVANCE_FEE_2}";
+       ADVANCE_FEE_3="${ADVANCE_FEE_3}";
+       FORGED_MUA_OUTLOOK="${FORGED_MUA_OUTLOOK}";
+       FORGED_OUTLOOK_TAGS="${FORGED_OUTLOOK_TAGS}";
+       HTML_SHORT_LINK_IMG_2="${HTML_SHORT_LINK_IMG_2}";
+       INVALID_MSGID="${INVALID_MSGID}";
+       HTML_MIME_NO_HTML_TAG="${HTML_MIME_NO_HTML_TAG}";
+       MIME_HEADER_CTYPE_ONLY="${MIME_HEADER_CTYPE_ONLY}";
+       MISSING_MIMEOLE="${MISSING_MIMEOLE}";
+       MISSING_SUBJECT="${MISSING_SUBJECT}";
+       RATWARE_MS_HASH="${RATWARE_MS_HASH}";
+       STOX_REPLY_TYPE="${STOX_REPLY_TYPE}";
+       FM_FAKE_HELO_VERIZON="${FM_FAKE_HELO_VERIZON}";
+       FORGED_MSGID_YAHOO="${FORGED_MSGID_YAHOO}";
+       FORGED_MUA_THEBAT_BOUN="${FORGED_MUA_THEBAT_BOUN}";
+       RCVD_DOUBLE_IP_SPAM="${RCVD_DOUBLE_IP_SPAM}";
+       REPTO_QUOTE_YAHOO="${REPTO_QUOTE_YAHOO}";
+       DRUGS_DIET="${DRUGS_DIET}";
+       DRUGS_ERECTILE="${DRUGS_ERECTILE}";
+       DRUGS_ANXIETY="${DRUGS_ANXIETY}";
+       DRUGS_ANXIETY_EREC="${DRUGS_ANXIETY_EREC}";
+       DRUGS_MANYKINDS="${DRUGS_MANYKINDS}";
+       
+       FAKE_REPLY_C="${FAKE_REPLY_C}";
+       MIME_HTML_ONLY="${MIME_HTML_ONLY}";
 };
 
 .module 'chartable' {
        metric = "default";
-       symbold = "R_MIXED_CHARSET";
+       symbol = "R_MIXED_CHARSET";
        threshold = "0.1";
 };
 
 .module 'emails' {
     metric = "default";
-       symbold = "R_BAD_EMAIL";
-    blacklist = "file:///some/path/emails.lst";
+       symbol = "R_BAD_EMAIL";
+    #blacklist = "file:///some/path/emails.lst";
 };
 
 # If enables threat each regexp as raw regex and do not try to convert