diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-06-24 17:09:57 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-06-24 17:09:57 +0400 |
commit | a99a4bf8d2241b19b70c16fa12f3ed7f3f96ae68 (patch) | |
tree | 41843c09bb74b549a0ff5b6b82bd33849fe192cb | |
parent | e454ec3f7b08364f0b884692d80ea4b1b95e6901 (diff) | |
download | rspamd-a99a4bf8d2241b19b70c16fa12f3ed7f3f96ae68.tar.gz rspamd-a99a4bf8d2241b19b70c16fa12f3ed7f3f96ae68.zip |
* Rework structure of sample configs
* Fix rspamc
* Add english readme
-rw-r--r-- | CMakeLists.txt | 5 | ||||
-rw-r--r-- | README.en.txt | 170 | ||||
-rw-r--r-- | conf/2tld.inc | 954 | ||||
-rw-r--r-- | conf/drugs.inc | 68 | ||||
-rw-r--r-- | conf/fraud.inc | 56 | ||||
-rw-r--r-- | conf/headers.inc | 167 | ||||
-rw-r--r-- | conf/html.inc | 28 | ||||
-rw-r--r-- | conf/lotto.inc | 16 | ||||
-rw-r--r-- | conf/surbl-whitelist.inc | 23 | ||||
-rwxr-xr-x | rspamc.pl.in | 4 | ||||
-rw-r--r-- | rspamd.conf.sample | 201 |
11 files changed, 1651 insertions, 41 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 676119827..c13721a4a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -393,6 +393,7 @@ ENDIF(ENABLE_PERL MATCHES "ON") CONFIGURE_FILE(config.h.in src/config.h) CONFIGURE_FILE(rspamc.pl.in rspamc.pl @ONLY) +CONFIGURE_FILE(rspamd.conf.sample conf/rspamd.conf.sample @ONLY) ADD_EXECUTABLE(rspamd ${RSPAMDSRC} ${CONTRIBSRC} ${TOKENIZERSSRC} ${CLASSIFIERSSRC} ${PLUGINSSRC} ${YACC_OUTPUT} @@ -454,8 +455,12 @@ ENDIF(LIBUTIL_LIBRARY) TARGET_LINK_LIBRARIES(utils/expression-parser ${GLIB2_LIBRARIES}) TARGET_LINK_LIBRARIES(utils/expression-parser ${GMIME2_LIBRARIES}) + INSTALL(PROGRAMS rspamd-${RSPAMD_VERSION} DESTINATION bin RENAME rspamd) INSTALL(PROGRAMS rspamc.pl DESTINATION bin RENAME rspamc) +INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_INSTALL_PREFIX}/etc/rspamd)") +INSTALL(DIRECTORY conf/ DESTINATION etc/rspamd) + IF(ENABLE_PERL MATCHES "ON") INSTALL(CODE "EXECUTE_PROCESS(COMMAND make install WORKING_DIRECTORY perl)") ENDIF(ENABLE_PERL MATCHES "ON") diff --git a/README.en.txt b/README.en.txt new file mode 100644 index 000000000..2bfd33faf --- /dev/null +++ b/README.en.txt @@ -0,0 +1,170 @@ +API. +=========== + +API of rspamd is described in Doxygen documentation. + +Logic of operation of rspamd filters. +============================== + +1) All filters are registered in a config a file in the description of chains of filters: +header_filters = "regexp, my_func" +Where the filter name is or the name c the unit, or the name of script (lua or perl) function +Types of filters: +* header_filters - the filters of headers +* mime_filters - the filters for every mime part +* message_filters - the filters of message without mime parsing +* url_filters - filters of URLs in messages + +Filter register their results in metrics. + +2) The Metric is a character value in which filters register their results. +There is a metrics by default - "default". +For each metrics there is a special function of consolidation which calculates coefficients +of results according to the internal logic of correspondence of characters and coefficients. +By default the such function is the simple sum that can be configured in a configuration file: + +# the Block factors +factors { + # For example, "SURBL_DNS" =5.0 + "SYMBOL_NAME" = coefficient; +}; + +Also for the metrics it is possible to register special consolidation function: + +metric { + name = "test_metric"; + function = "some_function"; + required_score = 20.0; +}; + + +The protocol. +========= + +Answer format: +SPAMD/1.1 0 EX_OK + \/ \/ \/ + Version Code Errors +Spam: False; 2 / 5 +It is a format of compatibility with sa-spamd (without metrics) + +New format of the answer: +RSPAMD/1.0 0 EX_OK +Metric: Name; Spam_Result; Spam_Mark / Spam_Mark_Required +Metric: Name2; Spam_Result2; Spam_Mark2 / Spam_Mark_Required2 + +Type headers metric can be a little. +Format of output of characters: +SYMBOL1, SYMBOL2, SYMBOL3 - a format of compatibility with sa-spamd +Symbol: Name; Param1, Param2, Param3 - a format rspamd + +The answer format: +PROCESS SPAMC/1.2 +\/ \/ +Command Version + +SPAMC - the protocol of compatibility with sa-spamd +RSPAMC - new rspamd protocol +In any of operating modes following headers are supported: +Content-Length - Length of the message +Helo - HELO, received from the client +From - MAIL FROM +IP - IP of the client +Recipient-Number - Number of recipients +Rcpt - the recipient +Queue-ID - The queue identifier + +These values can be used in filters rspamd. + +Regular expressions +==================== + +Regular expressions are described in regexp module +.module ' regexp ' { + SYMBOL = "regexp_expression"; +}; +header_filters = "regexp"; + +Format of regular expression: +"/pattern/flags" +Also for header lines there is special regexp line: +headername =/pattern/flags + +Flags of regexp: +i, m, s, x, u, o - same, as at perl/pcre +r - raw not coded in utf8 regexp +H - searches for a header +M - searches in undecoded message +P - searches in decoded mime parts +U - searches in urls +X - searches in undecoded headers + +Expression can contain regular expressions, functions, operators of logic and brackets: +SOME_SYMBOL = "To =/blah@blah/H AND! (From =/blah@blah/H | Subject =/blah/H)" + +Also it is possible to use variables: +$to_blah = "To =/blah@blah/H"; +$from_blah = "From =/blah@blah/H"; +$subject_blah = "Subject =/blah/H"; + +Then the previous expression will be such: + +SOME_SYMBOL = "$ {to_blah} AND! ($ {from_blah} | $ {subject_blah})" + +Logic expressions rspamd +=========================== + +Expressions containing regular expressions, functions, logic operations, brackets, can be used +for the filtering. General rules: +- Logic operations can be boolean "And": ' & ', boolean "OR": ' | ' and boolean negation: '! '. +- A priority of logic operations: &| -> !, for priority change it is possible to use brackets: + (A AND! B) |! (C|D) +- Space symbols in expressions are ignored +- The operand containing/re/args or string =/re/args is considered regular expression, in regular +expressions all symbols ' / ' and ' "' should be escaped by a symbol ' \', but symbol '\' is not need to be escaped. +- The operand which accepts arguments, is considered function. Arguments of function can be expressions, regexps or other functions. +Arguments in function are evaluated from left to right. +- There is a number of built-in functions: + * header_exists - accepts header's name as argument, returns true if such heading exists + * compare_parts_distance - accepts as argument number from 0 to 100 which reflects a difference in percentage + between letter parts. Function works with the messages containing 2 text parts (text/plain and text/html) and + returns true when these parts differ more than on N percent. If the argument is not specified, + function searches for completely different parts. + * compare_transfer_encoding - compares Content-Transfer-Encoding with the argument + * content_type_compare_param - compares Content-Type param with regular expression or line: + content_type_compare_param (Charset,/windows-\d +/) + content_type_compare_param (Charset, ascii) + * content_type_has_param - checks for specified Content-Type parameter + * content_type_is_subtype - compares a subtype of content-type to regular expression or line + * content_type_is_type - compares type of content-type to regular expression or line + content_type_is_type (text) + content_type_is_subtype (/?.html/) + * regexp_match_number - accepts as the number of matched expressions as first parameter number and list of expressions. + If the number of matched expressions is more than first argument function returns TRUE, for example: + regexp_match_number (2, $ {__ RE1}, $ {__ RE2}, header_exists (Subject)) + * has_only_html_part - function returns TRUE if there is only HTML part in the message + * compare_recipients_distance - calculates percent of similar recipients of the message. Accepts argument - a threshold in + percentage of similar recipients. + * is_recipients_sorted - returns TRUE if the list of addressees is sorted (works only if the number of addressees> = 5). + * is_html_balanced - returns TRUE if tags in all html parts are balanced + * has_html_tag - returns TRUE if specified html tag is found + +The module chartable. +================ + +The module is intended for search of words with the mixed symbols, for example: +kашa - a part in a Latin, and a part in Cyrillics. +Module parametres: + +.module ' chartable ' { + metric = "default"; + symbold = "R_MIXED_CHARSET"; + threshold = "0.1"; +}; + +threshold is a relation of transitions between codings to total number of symbols in words, for example, we have a word +"kаша" (the first letter Latin), then total number of transitions - 3, and number of transitions between codings - 1, then +The relation - 1/3. + +For inclusion of the module he is necessary for adding in the list mime_filters: +mime_filters = "chartable"; diff --git a/conf/2tld.inc b/conf/2tld.inc new file mode 100644 index 000000000..299f96229 --- /dev/null +++ b/conf/2tld.inc @@ -0,0 +1,954 @@ +# +# $Id: rspamd-2tld.conf,v 1.3 2009/06/17 15:01:17 dmx Exp $ +# +infobox.ru +free.bg +mirohost.net +007sites.com +55fast.com +5nxs.com +freehost10.com +hostaim.com +iwebsource.com +yourfreehosting.net +freehostingz.com +io.ua +3eu.ru + +intway.info +intwayblog.net +ruprom.net + +lpchat.com +szm.com +gmxhome.de +freewaywebhost.com +sdsmt.edu +ifrance.com +100webspace.net +jimdo.com +gn8.net +siteedit.su +se-ua.net + +googlegroups.com + +digitalzones.com +arcadepages.com +builtfree.org +angelcities.com +freehostyou.com +freesite.org +freecities.com +freewebpages.org +ibnsites.com + +samomu.ru +blog-nn.ru + +byethost24.com +hostia.ru +times.lv +z8.ru +1gb.ru +url-site.com +911mb.com +tushino.com + +sp.ru + +omp9.com + +1freewebspace.com +freewhost.com +na.by +gq.nu +5u.com +hy.cz +8k.com + +nichost.ru + +qwerty.su +qwerty.in + +10fast.net +0fees.net +netsolhost.com +webs.com +we.bs +wz.cz +go9.ru +com.ua + +gorodok.net +republika.pl +interia.pl +nazwa.pl + +infostore.org +sapo.pt + +promzone.ru + +# Services for make URL's +notlong.com + +# internet shops +wowex.ru +webasyst.net + +# ayola.net free hosting +md6.ru +ex6.ru +w6.ru +sk6.ru +md8.ru +z16.ru +cwx.ru + +# hostland.ru free hosting +tu2.ru + +freetzi.com +coolpage.biz + +# free a1free.net hosting +a1free.net + +# free blog-o-hosting +byethost2.com +byethost3.com +byethost4.com +byethost5.com +byethost6.com +byethost7.com +byethost8.com +byethost9.com +byethost10.com +byethost11.com +byethost12.com +byethost13.com +byethost14.com +byethost15.com +byethost16.com +byethost17.com +byethost18.com + +# free lunatic hosting +aecru.org + +tw1.ru +jino.ru +ru.gg + +# http://www.de.gd/millired/home.html +eu.ki +cool.lc +m.vu +de.pl +at.st +ch.st +seite.com +mobi.ps +biz.ps +1x.net +seite.info +deutschland.nu +bilder.net +flirten.info +seite.name +seite.in +seite.cz +seite.es +seite.pl +seite.ru +seite.im +seite.li +seite.lt +seite.lv +seite.sc +seite.st +seite.vc +infos.lc +deutschland.lc +homepage.lc +auto.lc +musik.lc +urlaub.lc +musik.cx +germany.cx +homepage.cx +infos.cx +cool.hn +shopping.hn +tipp.cz +gmbh.tw +de.im +top8.com +game.lc +start.lc +xx.lc +hp.lc +on.lc +portal.lc +faq.lc +page.lc +home.lc +domain.lc +spiele.lc +free.lc +kostenlos.lc +test.lc +privat.lc +dvd.lc +pc.lc +fotos.lc +top.lc +hot.lc +gratis.lc +forum.lc +bilder.lc +reise.lc +liebe.lc +24.lc +vz.lc +links.lc +service.lc +club.lc +shops.lc +da.cx +eur.lc +euro.lc +europa.lc +fehler.in +bund.in +hase.in +teufel.in +hexe.in +mitglieder.in +voten.in +inserat.in +smilie.in +papst.in +foren.in +politik.in +vertrieb.in +anschauen.in +finanzen.in +redner.in +esel.in +hund.in +sport.dj +clandomain.de +clandomain.org +cl4n.org +l4n.org +cs-clan.org +kr3w.de +te4m.de +thelan.info +download.ac +fun.gg +download.sh +download.je +downloads.lc +clan.lc +clan.mn +uk.nf +web.gg +k1.cx +gb.nf +us.nf +usa.gg +ru.nf +blog.lc +spam.lc +about.lc +user.lc +xl.lc +xxl.lc +all.lc +2.je +4.je +6.je +7.je +8.je +9.je +eu.gp +de.gp +deutsch.lc +english.lc +francais.lc +espanol.lc +italiano.lc +portugues.lc +dansk.lc +nederlands.lc +polski.lc +Norsk.lc +svenska.lc +turkish.lc +chinese.lc +suomi.lc +japanese.lc +russian.lc +greek.lc +oesterreich.lc +schweiz.lc +nederland.lc +zx9.de +aw3.de +com.nu +eu.nu +firm.nu +qu.am +co.gp +movie.lc +filme.lc +int.nf +int.ps +jixx.de +jixx.net +guest.de +guests.de +ciy.de +tv.gg +eu.gg +us.gg +x.gg +npx.de +yj.ae +flf.li +2.ag +fk.gs +se.nf +ca.nf +au.nf +be.nf +dk.nf +fi.nf +gr.nf +no.nf +pl.nf +cz.nf +jp.nf +ar.nf +kr.nf +br.nf +cn.nf +nl.gp +es.gp +asia.gp +online.gp +fr.gp +it.gp +search.gp +mp3.gp +b2b.lc +wiki.lc +flirt.lc +18.lc +3d.lc +party.lc +photos.lc +you.lc +space.lc +share.lc +today.lc +de.ki +in.nf +world.mu +now.lc +welt.tl +team.tl +planet.tl +netz.tl +center.tl +server.tl +design.tl +city.tl +berlin.tl +team.cx +server.gg +community.lc +gaming.lc +gilde.in +chat.dj +dr.ag +name.vu +media.je +edu.ms +name.vg +pro.vg +biz.uz +pro.ac +name.ac +seite.asia +eu.cr +uk.cr +ch.kg +nl.kg +gr.kg +pt.kg +lu.kg +hu.kg +online.tc +info.nu +de.cg +de.gd +9gb.de +x9.eu +uk.st +us.st +at.cr +online.cr +shopping.cr +mp3.cr +free.cr +sms.cr +1.vg +top.tc +mail.ht +job.ec +de.ht +seite24.eu +2.ly +pro.ly +internet.ly +us.ly +me.ly +shop.fm + +# + +altervista.org +iespana.es + +sk6.ru +cantv.net +lycos.es +biz.ua +wbs.cz +ic.cz + +# 000webhost.com domains +000webhost.com +net23.net +net84.net +site88.net +web44.net + +ya.com +hopto.org +free-webhosts.com +byteact.com +icr38.net +angelfire.com +mylivepage.ru +valuehost.ru +netfirms.com +siteedit.ru + +# Other Zenon domains + +zmail.ru +id.ru +go.ru + +# Free wap hosting +wen.ru + +# Relcom +msk.su +spb.su +abkhazia.su +adygeya.su +arkhangelsk.su +armenia.su +ashgabad.su +azerbaijan.su +balashov.su +bashkiria.su +belgorod.su +bryansk.su +bukhara.su +chimkent.su +dagestan.su +east-kazakhstan.su +exnet.su +georgia.su +grozny.su +ivanovo.su +jambyl.su +kalmykia.su +kaluga.su +karacol.su +karaganda.su +karelia.su +khakassia.su +komi.su +krasnodar.su +kurgan.su +kustanai.su +lenug.su +mangyshlak.su +mordovia.su +murmansk.su +nalchik.su +navoi.su +north-kazakhstan.su +nov.su +obninsk.su +penza.su +pokrovsk.su +sochi.su +tashkent.su +termez.su +togliatti.su +troitsk.su +tselinograd.su +tula.su +tuva.su +vladikavkaz.su +vladimir.su +vologda.su +yakutia.su + +europtrade.ru +890m.com +site40.net +site50.net +freezoka.com +blogspot.com +pvm62.com +virtuale.org +access.to +topf.ru +pisem.su +ok.ru +100mb.com +co.cc + +# other pocha.ru domains +krovatka.su + +# ucoz.ru +3dn.ru +at.ua +clan.su +do.am +moy.su +my1.ru +p0.ru +ucoz.com +ucoz.de +ucoz.es +ucoz.hu +ucoz.kz +ucoz.lv +ucoz.net +ucoz.org +ucoz.ua +vo.uz + +# Create a ShortURL +1sta.com +vze.com + +# free hosters from: +# http://svn.park.rambler.ru/svn/trunk/Spider/conf/freehosters.exclude + +spb.ru +rbcmail.ru +hut.ru +narod.ru +narod2.ru +chat.ru +by.ru +far.ru +h1.ru +boom.ru +nm.ru +fbi.ru +4all.ru +bizmail.ru +dnevnik.ru +ecard.ru +ganja.ru +goa.ru +hash.ru +mpeg3.ru +nrg.ru +phreak.ru +plazma.ru +vi-rus.ru +xak.ru +newmail.ru +z-photo.ru +agava.ru +dem.ru +webservis.ru +ur.ru +bos.ru +vov.ru +r2.ru +bip.ru +wallst.ru +hotmail.ru +hotbox.ru +euro.ru +dax.ru +sitecity.ru +al.ru +dtn.ru +hop.ru +lgg.ru +ru.ru +pochtamt.ru +stsland.ru +promural.ru +ufanet.ru +kharkov.ua +pisem.net +fromru.com +ukrbiz.net +aiq.ru +fatal.ru +hoha.ru +h11.ru +h5.ru +pochta.ru +mail333.com +krovatka.net +orc.ru +h12.ru +front.ru +land.ru +com1.ru +h14.ru +8m.com +sbn.bz +xost.ru +nxt.ru +proxycheker.ru +h15.ru +hut1.ru +webhost.ru +polubomu.ru +beplaced.ru +h16.ru +yard.ru +smtp.ru +pop3.ru +hut2.ru +subs.ru +pud.ru +hostonfly.ru +jino-net.ru +vio.ru +vip.su +i-nets.ru +sitehome.ru +freezona.ru +pixi.ru +ucoz.ru +russian.ru +mail2k.ru +nextmail.ru +programist.ru +dezigner.ru +email.su +xaker.ru +rubas.ru +e2e.ru +page.by +x5x.ru +h17.ru +nightmail.ru +h10.ru +tu1.ru +fxf.ru +inlocal.ru +hostq.ru +greenline.ru +21r.ru +0805.ru +aget.ru +freepage.ru +hoter.ru +rx.ru +wmsite.ru +fxcity.ru +ffx.ru +clubfx.ru +h18.ru +vipshop.ru +vipcentr.ru +vdelo.ru +viptop.ru +tora.ru +fromru.su +iplot.ru +pips.ru +web-box.ru +okis.ru +hocomua.ru +vdsite.ru +enjjoy.ru +metastock.ru +onlymail.ru +tut.su +ifolder.ru +wapn.ru +hocom.by +yadviga.ru +x53.ru +ilovethis.ru +codingclub.ru +onepage.ru +onep.ru +kzet.ru +xam.su +epage.ru +student.su +hu2.ru +logmail.ru +students.ru +h2m.ru +ho.ua +mybyte.ru +supercharts.ru +forum24.ru +quake.ru + +# free hosters from: +# http://svn.park.rambler.ru/svn/trunk/Spider/conf/foreign_freehosters.exclude + +hotusa.org +99inch.com +yoll.net +my-age.net +enacre.net +maclenet.com +bebto.com +undonet.com +lydo.org +imess.net +pochta.org +mail15.com +fbhosting.com +freeservers.com +4t.com +iwarp.com +faithweb.com +homestead.com +itgo.com +tvheaven.com +netfreehost.com +mindnmagick.com +frageon.net +dnip.net +htmlplanet.com +scriptmania.com +100mbfreesite.com +sfkteam.com +glooby.net +eclub.lv +badland.com +zomi.net +power-emergency.com +at.lv +uadom.net +ho11.com +0golf.com +netfast.org +topcities.com +100free.com +1hwy.com +lbgo.com +izypage.net +250free.com +741.com +noneto.com +bestyour.org +501megs.com +atspace.com +b4site.com +gobot.com +100freemb.com +ueuo.com +atspace.org +8m.net +freehosting.net +greatnow.com +1sweethost.com +zzn.com +i8.com +servik.com +s5.com +freehostia.com +sitesfree.com +150m.com +zoomshare.com +freehostspace.com +hostingisus.com +3000mb.com +0catch.com +xphost.org +onecoolhost.com +php0h.net +0pi.com +cheeb.com +20m.com +00server.com +sitesled.com +00trek.com +freewebspace.com +4mg.com +awardspace.com +00author.com +prohosting.com +50webs.com +275mb.com +20to.com +6te.net +freewebpage.org +indiegroup.com +memebot.com +5gbfree.com +ourprofile.us +110mb.com +kwikphp.com +bappy.com +sphosting.com +tekcities.com +alfaspace.net +vettepics.com +atspace.us +t35.com +oceansfree.com +30mb.com +urllogs.com +freehomepage.com +ifastnet.com +hostervice.info +swiftphp.com +atspace.name +php0h.com +freehostpro.com +freehostonline.com +awardspace.biz +tecbox.com +deep-ice.com +myweb.io +1111mb.com +freerhost.com +0moola.com +50megs.com +fws1.com +4sql.net +1majorhost.com +gigcities.com +0505mb.com +freebitty.com +prohosts.org +gigsweb.com +101freehost.com +reghosting.com +sprinterweb.net +0000host.com +2u-2.com +tripod.com +nofeehost.com +web.com +kogaryu.com +itrello.com +bravehost.com +sinfree.net +20ii.com +on-4.com +quotaless.com +isgreat.org +worldbreak.com +20is.com +00it.com +fx-club.org +9cy.com +psend.com +atspace.biz +totalh.com +orgfree.com +php1h.com +1asphost.com +789mb.com +yeahost.com +3-hosting.net +byethost.com +wtcsites.com +myfreewebhost.org +20fr.com +jvl.com +brinkster.net +freehosting300.com +usafreespace.com +freewebhostingpro.com +justfree.com +sppages.com +70mb.ru +joolo.com +free-site-host.com +eqo.de +fora.pl +envy.nu +247ihost.com +00bp.com +iifree.net +fr33webhost.com +invbridge.com +servetown.com +fcpages.com +dex1.com +007webpro.com +22web.net +125mb.com +12gbfree.com +freeweb7.com +hophost.net +lookingat.us +0buckhost.com +batcave.net +forever.kz +web1000.com +hit.bg +fatfreehost.com +quickfreehosting.com +zxq.net +zzl.org +zymichost.com +webng.com +nutzworld.net +freehostplace.com +hostwq.net +dreamstation.com +012webpages.com +host-page.com +mokoginta.net +cileni.com +ownspace.org +movillink.com +mundesweb.com +vhost4free.com +700up.com +siteburg.com +9skul.com +datadiri.com +freehostpage.com +hostinggratisvenezuela.com +free-web-hosting.biz +terapad.com +10001mb.com +blogindo.net +o-f.com +topfreewebhosting.com +freeadsensehost.com diff --git a/conf/drugs.inc b/conf/drugs.inc new file mode 100644 index 000000000..444bc8596 --- /dev/null +++ b/conf/drugs.inc @@ -0,0 +1,68 @@ +# Rspamd variables for drugs emails + +$__DRUGS_DIET1="/(?:\b|\s)[_\W]{0,3}p[_\W]{0,3}h[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}n[_\W]{0,3}t[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}r[_\W]{0,3}m[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}n[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}(?:\b|\s)/irP"; +$__DRUGS_DIET2="/(?:\b|\s)_{0,3}[i1!|l\xEC-\xEF][_\W]?o[_\W]?n[_\W]?[a4\xE0-\xE6@][_\W]?m[_\W]?[i1!|l\xEC-\xEF][_\W]?n_{0,3}\b/irP"; +$__DRUGS_DIET3="/\bbontril\b/irP"; +$__DRUGS_DIET4="/\bphendimetrazine\b/irP"; +$__DRUGS_DIET5="/\bdiethylpropion\b/irP"; +$__DRUGS_DIET6="/(?:\b|\s)[_\W]{0,3}M[_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}r[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}d[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}(?:\b|\s)/irP"; +$__DRUGS_DIET7="/\b_{0,3}t[_\W]?[e3\xE8-\xEB][_\W]?n[_\W]?u[_\W]?a[_\W]?t[_\W]?[e3\xE8-\xEB]_{0,3}(?:\b|\s)/irP"; +$__DRUGS_DIET8="/\b_{0,3}d[_\W]?[i1!|l\xEC-\xEF][_\W]?d[_\W]?r[_\W][e3\xE8-\xEB[_\W]?xx?_{0,3}\b/irP"; +$__DRUGS_DIET9="/\b_{0,3}a[_\W]?d[_\W]?[i1!|l\xEC-\xEF][_\W]?p[_\W]?[e3\xE8-\xEB][_\W]?x_{0,3}\b/irP"; +$__DRUGS_DIET10="/\b_{0,3}x?x[_\W]?[e3\xE8-\xEB][_\W]?n[_\W]?[i1!|l\xEC-\xEF][_\W]?c[_\W]?[a4\xE0-\xE6@][_\W]?l_{0,3}\b/irP"; +$DRUGS_DIET="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & (${__DRUGS_DIET1} | ${__DRUGS_DIET2} | ${__DRUGS_DIET3} | ${__DRUGS_DIET4} | ${__DRUGS_DIET5} | ${__DRUGS_DIET6} | ${__DRUGS_DIET7} | ${__DRUGS_DIET8} | ${__DRUGS_DIET9} | ${__DRUGS_DIET10})"; + +$__DRUGS_ERECTILE1="/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[ij1!|l\xEC\xED\xEE\xEF][_\W]{0,3}[a40\xE0-\xE6@][_\W]{0,3}[xyz]?[gj][_\W]{0,3}r[_\W]{0,3}[a40\xE0-\xE6@][_\W]{0,3}x?[_\W]{0,3}(?:\b|\s)/irP"; +$__DRUGS_ERECTILE2="/\bV(?:agira|igara|iaggra|iaegra)\b/irP"; +$__DRUGS_ERECTILE3="/(?:\A|[\s\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f])[_\W]{0,3}C[_\W]{0,3}[ij1!|l\xEC\xED\xEE\xEF][_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}l?[l!|1][_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}s[_\W]{0,3}(?:\b|\s)/irP"; +$__DRUGS_ERECTILE4="/\bC(?:alis|ilias|ilais)\b/irP"; +$__DRUGS_ERECTILE5="/\b_{0,3}s[_\W]?[i1!|l\xEC-\xEF][_\W]?l[_\W]?d[_\W]?[e3\xE8-\xEB][_\W]?n[_\W]?[a4\xE0-\xE6@][_\W]?f[_\W]?[i1!|l\xEC-\xEF][_\W]?l c[_\W]?[i1!|l\xEC-\xEF][_\W]?t[_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?t[_\W]?[e3\xE8-\xEB]_{0,3}(?:\b|\s)/irP"; +$__DRUGS_ERECTILE6="/\b_{0,3}L[_\W]?[e3\xE8-\xEB][_\W]?(?:\\\/|V)[_\W]?[i1!|l\xEC-\xEF][_\W]?t[_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?(?:\b|\s)/irP"; +$__DRUGS_ERECTILE8="/\b_{0,3}T[_\W]?[a4\xE0-\xE6@][_\W]?d[_\W]?[a4\xE0-\xE6@][_\W]?l[_\W]?[a4\xE0-\xE6@][_\W]?f[_\W]?[i1!|l\xEC-\xEF][_\W]?l_{0,3}\b/irP"; +$__DRUGS_ERECTILE10="/\b_{0,3}V[_\W]?(?:i|\ï\;)[_\W]?(?:a|\à|\å)\;?[_\W]?g[_\W]?r[_\W]?(?:a|\à|\å)\b/irP"; +$__DRUGS_ERECTILE11="/(?:\b|\s)_{0,3}[a4\xE0-\xE6@][_\W]{0,3}p[_\W]{0,3}c[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}[l!|1][_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}s_{0,3}\b/irP"; +$DRUGS_ERECTILE="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & (${__DRUGS_ERECTILE1} | ${__DRUGS_ERECTILE2} | ${__DRUGS_ERECTILE3} | ${__DRUGS_ERECTILE4} | ${__DRUGS_ERECTILE5} | ${__DRUGS_ERECTILE6} | ${__DRUGS_ERECTILE8} | ${__DRUGS_ERECTILE10} | ${__DRUGS_ERECTILE11})"; + +$__DRUGS_ANXIETY1="/(?:\b|\s)[_\W]{0,3}x?x[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}n[_\W]{0,3}[ea4\xE1\xE2\xE3@][_\W]{0,3}xx?_{0,3}\b/irP"; +$__DRUGS_ANXIETY2="/\bAlprazolam\b/irP"; +$__DRUGS_ANXIETY3="/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}[l|][_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[u\xB5\xF9-\xFC][_\W]{0,3}m\b/irP"; +$__DRUGS_ANXIETY4="/\b_{0,3}D[_\W]?[i1!|l\xEC-\xEF][_\W]?[a4\xE0-\xE6@][_\W]?z[_\W]?[ea3\xE9\xEA\xEB][_\W]?p[_\W]?[a4\xE0-\xE6@][_\W]?m_{0,3}\b/irP"; +$__DRUGS_ANXIETY5="/(?:\b|\s)[a4\xE0-\xE6@][_\W]?t[_\W]?[i1!|l\xEC-\xEF][_\W]?v[_\W]?[a4\xE0-\xE6@][_\W]?n_{0,3}\b/irP"; +$__DRUGS_ANXIETY6="/\b_{0,3}l[_\W]?[o0\xF2-\xF6][_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?z[_\W]?[e3\xE8-\xEB][_\W]?p[_\W]?[a4\xE0-\xE6@][_\W]?m_{0,3}\b/irP"; +$__DRUGS_ANXIETY7="/\b_{0,3}c[_\W]?l[_\W]?[o0\xF2-\xF6][_\W]?n[_\W]?[a4\xE0-\xE6@][_\W]?z[_\W]?e[_\W]?p[_\W]?[a4\xE0-\xE6@][_\W]?m\b/irP"; +$__DRUGS_ANXIETY8="/\bklonopin\b/irP"; +$__DRUGS_ANXIETY9="/\brivotril\b/irP"; +$DRUGS_ANXIETY="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & (${__DRUGS_ANXIETY1} | ${__DRUGS_ANXIETY2} | ${__DRUGS_ANXIETY3} | ${__DRUGS_ANXIETY4} | ${__DRUGS_ANXIETY5} | ${__DRUGS_ANXIETY6} | ${__DRUGS_ANXIETY7} | ${__DRUGS_ANXIETY8} | ${__DRUGS_ANXIETY9})"; +$DRUGS_ANXIETY_EREC="${DRUGS_ERECTILE} & ${DRUGS_ANXIETY}"; + +$__DRUGS_PAIN1="/\b_{0,3}h[_\W]?y[_\W]?d[_\W]?r[_\W]?[o0\xF2-\xF6][_\W]?c[_\W]?[o0\xF2-\xF6][_\W]?d[_\W]?[o0\xF2-\xF6][_\W]?n[_\W]?e_{0,3}\b/irP"; +$__DRUGS_PAIN2="/\b_{0,3}c[o0\xF2-\xF6]deine_{0,3}\b/irP"; +$__DRUGS_PAIN3="/(?:\b|\s)[_\W]{0,3}[u\xB5\xF9-\xFC][_\W]{0,3}l[_\W]{0,3}t[_\W]{0,3}r[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}m_{0,3}\b/irP"; +$__DRUGS_PAIN4="/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}c[_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}d[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}ns?[_\W]{0,3}(?:\b|\s)/irP"; +$__DRUGS_PAIN5="/\b_{0,3}t[_\W]?r[_\W]?[a4\xE0-\xE6@][_\W]?m[_\W]?[a4\xE0-\xE6@][_\W]?d[_\W]?[o0\xF2-\xF6][_\W]?[l!|1]_{0,3}\b/irP"; +$__DRUGS_PAIN6="/\b_{0,3}u[_\W]?l[_\W]?t[_\W]?r[_\W]?a[_\W]?c[_\W]?e[_\W]?t_{0,3}\b/irP"; +$__DRUGS_PAIN7="/\b_{0,3}f[_\W]?[i1!|l\xEC-\xEF][_\W]?[o0\xF2-\xF6][_\W]?r[_\W]?[i1!|l\xEC-\xEF][_\W]?c[_\W]?[e3\xE8-\xEB][_\W]?[t7]_{0,3}\b/irP"; +$__DRUGS_PAIN8="/\b_{0,3}c[_\W]?[e3\xE8-\xEB][_\W]?l[_\W]?[e3\xE8-\xEB][_\W]?b[_\W]?r[_\W]?[e3\xE8-\xEB][_\W]?x_{0,3}\b/irP"; +$__DRUGS_PAIN9="/(?:\b|\s)_{0,3}[i1!|l\xEC-\xEF]m[i1!|l\xEC-\xEF]tr[e3\xE8-\xEB]x_{0,3}\b/irP"; +$__DRUGS_PAIN10="/(?:\b|\s)[_\W]{0,3}(?:\\\/|V)[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}x[_\W]{0,3}xx?_{0,3}\b/irP"; +$__DRUGS_PAIN11="/\bzebutal\b/irP"; +$__DRUGS_PAIN12="/\besgic plus\b/irP"; +$__DRUGS_PAIN13="/\bD[_\W]?[a4\xE0-\xE6@][_\W]?r[_\W]?v[_\W]?[o0\xF2-\xF6][_\W]?n\b/irP"; +$__DRUGS_PAIN14="/N[o0\xF2-\xF6]rc[o0\xF2-\xF6]/irP"; +$__DRUGS_PAIN="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & (${__DRUGS_PAIN1} | ${__DRUGS_PAIN2} | ${__DRUGS_PAIN3} | ${__DRUGS_PAIN4} | ${__DRUGS_PAIN5} | ${__DRUGS_PAIN6} | ${__DRUGS_PAIN7} | ${__DRUGS_PAIN8} | ${__DRUGS_PAIN9} | ${__DRUGS_PAIN10} | ${__DRUGS_PAIN11} | ${__DRUGS_PAIN12} || ${__DRUGS_PAIN13} | ${__DRUGS_PAIN14})"; + +$__DRUGS_SLEEP1="/(?:\b|\s)[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}m[_\W]{0,3}b[_\W]{0,3}[i1!|l\xEC-\xEF][_\W]{0,3}[e3\xE8-\xEB][_\W]{0,3}n[_\W]{0,3}(?:\b|\s)/irP"; +$__DRUGS_SLEEP2="/(?:\b|\s)[_\W]{0,3}S[_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}n[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}t[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}(?:\b|\s)/irP"; +$__DRUGS_SLEEP3="/\b_{0,3}R[_\W]?[e3\xE8-\xEB][_\W]?s[_\W]?t[_\W]?[o0\xF2-\xF6][_\W]?r[_\W]?i[_\W]?l_{0,3}\b/irP"; +$__DRUGS_SLEEP4="/\b_{0,3}H[_\W]?[a4\xE0-\xE6@][_\W]?l[_\W]?c[_\W]?i[_\W]?[o0\xF2-\xF6][_\W]?n_{0,3}\b/irP"; +$__DRUGS_SLEEP="${__DRUGS_SLEEP1} | ${__DRUGS_SLEEP2} | ${__DRUGS_SLEEP3} | ${__DRUGS_SLEEP4}"; + +$__DRUGS_MUSCLE1="/(?:\b|\s)[_\W]{0,3}s[_\W]{0,3}[o0\xF2-\xF6][_\W]{0,3}m[_\W]{0,3}[a4\xE0-\xE6@][_\W]{0,3}(?:\b|\s)/irP"; +$__DRUGS_MUSCLE2="/\b_{0,3}cycl[o0\xF2-\xF6]b[e3\xE8-\xEB]nz[a4\xE0-\xE6@]pr[i1!|l\xEC-\xEF]n[e3\xE8-\xEB]_{0,3}(?:\b|\s)/irP"; +$__DRUGS_MUSCLE3="/\b_{0,3}f[_\W]?l[_\W]?[e3\xE8-\xEB][_\W]?x[_\W]?[e3\xE8-\xEB][_\W]?r[_\W]?[i1!|l\xEC-\xEF]_{0,3}[_\W]?l_{0,3}\b/irP"; +$__DRUGS_MUSCLE4="/\b_{0,3}z[_\W]?a[_\W]?n[_\W]?a[_\W]?f[_\W]?l[_\W]?e[_\W]?x_{0,3}\b/irP"; +$__DRUGS_MUSCLE5="/\bskelaxin\b/irP"; +$DRUGS_MUSCLE="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & (${__DRUGS_MUSCLE1} | ${__DRUGS_MUSCLE2} | ${__DRUGS_MUSCLE3} | ${__DRUGS_MUSCLE4} | ${__DRUGS_MUSCLE5})"; + +$DRUGS_MANYKINDS="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & regexp_match_number(3, ${DRUGS_ERECTILE}, ${DRUGS_DIET}, ${__DRUGS_PAIN}, ${__DRUGS_SLEEP}, ${DRUGS_MUSCLE}, ${DRUGS_ANXIETY})"; + diff --git a/conf/fraud.inc b/conf/fraud.inc new file mode 100644 index 000000000..2c52471a1 --- /dev/null +++ b/conf/fraud.inc @@ -0,0 +1,56 @@ +# Advance fee spam + +$__FRAUD_DBI="/(?:\bdollars?\b|\busd(?:ollars)?(?:[0-9]|\b)|\bus\$|\$[0-9,.]{6,}|\$[0-9].{0,8}[mb]illion|\$[0-9.,]{2,10} ?m|\beuros?\b|u[.]?s[.]? [0-9.]+ m)/irP"; +$__FRAUD_KJV="/(?:claim|concerning) (?:the|this) money/irP"; +$__FRAUD_IRJ="/(?:finance|holding|securit(?:ies|y)) (?:company|firm|storage house)/irP"; +$__FRAUD_NEB="/(?:government|bank) of nigeria/irP"; +$__FRAUD_XJR="/(?:who was a|as a|an? honest|you being a|to any) foreigner/irP"; +$__FRAUD_DPR="/\b(?:(?:respond|reply) (?:urgently|immediately)|(?:urgent|immediate|earliest) (?:reply|response))\b/irP"; +$__FRAUD_PTS="/\b(?:ass?ass?inat(?:ed|ion)|murder(?:e?d)?|kill(?:ed|ing)\b[^.]{0,99}\b(?:war veterans|rebels?))\b/irP"; +$__FRAUD_BEP="/\b(?:bank of nigeria|central bank of|trust bank|apex bank|amalgamated bank)\b/irP"; +$__FRAUD_TDP="/\b(?:business partner(?:s|ship)?|silent partner(?:s|ship)?)\b/irP"; +$__FRAUD_GAN="/\b(?:charles taylor|serena|abacha|gu[eéè]i|sese[- ]?seko|kabila)\b/irP"; +$__FRAUD_IRT="/\b(?:compliments? of the|dear friend|dear sir|yours faithfully|season'?s greetings)\b/irP"; +$__FRAUD_AON="/\b(?:confidential|private|alternate|alternative) (?:(?:e-? *)?mail)\b/irP"; +$__FRAUD_WNY="/\b(?:disburse?(?:ment)?|incurr?(?:ed)?|remunerr?at(?:ed?|ion)|remm?itt?(?:ed|ance|ing)?)\b/irP"; +$__FRAUD_IPK="/\b(?:in|to|visit) your country\b/irP"; +$__FRAUD_QXX="/\b(?:my name is|i am) (?:mrs?|engr|barrister|dr|prince(?:ss)?)[. ]/irP"; +$__FRAUD_IOU="/\b(?:no risks?|risk-? *free|free of risks?|100% safe)\b/irP"; +$__FRAUD_EZY="/\b(?:of|the) late president\b/irP"; +$__FRAUD_MLY="/\b(?:reply|respond)\b[^.]{0,50}\b(?:to|through)\b[^.]{0,50}\@\b/irP"; +$__FRAUD_ZFJ="/\b(?:wife|son|brother|daughter) of the late\b/irP"; +$__FRAUD_KDT="/\bU\.?S\.?(?:D\.?)?\s*(?:\$\s*)?(?:\d+,\d+,\d+|\d+\.\d+\.\d+|\d+(?:\.\d+)?\s*milli?on)/irP"; +$__FRAUD_ULK="/\baffidavits?\b/irP"; +$__FRAUD_BGP="/\battached to ticket number\b/irP"; +$__FRAUD_FBI="/\bdisburs/irP"; +$__FRAUD_JBU="/\bforeign account\b/irP"; +$__FRAUD_YWW="/\bfurnish you with\b/irP"; +$__FRAUD_JYG="/\bgive\s+you .{0,15}(?:fund|money|total|sum|contact|percent)\b/irP"; +$__FRAUD_XVW="/\bhonest cooperation\b/irP"; +$__FRAUD_UUY="/\blegitimate business(?:es)?\b/irP"; +$__FRAUD_SNT="/\blocate(?: .{1,20})? extended relative/irP"; +$__FRAUD_LTX="/\bmilli?on (?:.{1,25} thousand\s*)?(?:(?:united states|u\.?s\.?) dollars|(?i:U\.?S\.?D?))\b/irP"; +$__FRAUD_JNB="/\boperat(?:e|ing)\b[^.]{0,99}\b(?:for(?:ei|ie)gn|off-? ?shore|over-? ?seas?) (?:bank )?accounts?\b/irP"; +$__FRAUD_QFY="/\bover-? *(?:invoiced?|cost(?:s|ing)?)\b/irP"; +$__FRAUD_WDR="/\bprivate lawyer\b/irP"; +$__FRAUD_WFC="/\bsecur(?:e|ing) (?:the )?(?:funds?|monies)\b/irP"; +$__FRAUD_AUM="/\bthe desk of\b/irP"; +$__FRAUD_MCQ="/\btransaction\b.{1,30}\b(?:magnitude|diplomatic|strict|absolute|secret|confiden(?:tial|ce)|guarantee)/irP"; +$__FRAUD_ETX="/\byour\b[^.]{0,99}\b(?:contact (?:details|information)|private (?:e?[- ]?mail|telephone|tel|phone|fax))\b/irP"; +$__FRAUD_PVN="/as the beneficiary/irP"; +$__FRAUD_FVU="/award notification/irP"; +$__FRAUD_CKF="/computer ballot system/irP"; +$__FRAUD_FCW="/fiduciary agent/irP"; +$__FRAUD_MQO="/foreign (?:business partner|customer)/irP"; +$__FRAUD_TCC="/foreign (?:offshore )?(?:bank|account)/irP"; +$__FRAUD_GBW="/god gives .{1,10}second chance/irP"; +$__FRAUD_NRG="/i am contacting you/irP"; +$__FRAUD_RLX="/lott(?:o|ery) (?:co,?ordinator|international)/irP"; +$__FRAUD_AXF="/magnanimity/irP"; +$__FRAUD_THJ="/modalit(?:y|ies)/irP"; +$__FRAUD_YQV="/nigerian? (?:national|government)/irP"; +$__FRAUD_YJA="/over-invoice/irP"; +$__FRAUD_YPO="/the total sum/irP"; +$__FRAUD_UOQ="/vital documents/irP"; +$ADVANCE_FEE_2="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & regexp_match_number(2, ${__FRAUD_KJV}, ${__FRAUD_IRJ}, ${__FRAUD_NEB}, ${__FRAUD_XJR}, ${__FRAUD_EZY}, ${__FRAUD_ZFJ}, ${__FRAUD_KDT}, ${__FRAUD_BGP}, ${__FRAUD_FBI}, ${__FRAUD_JBU}, ${__FRAUD_JYG}, ${__FRAUD_XVW}, ${__FRAUD_SNT}, ${__FRAUD_LTX}, ${__FRAUD_MCQ}, ${__FRAUD_PVN}, ${__FRAUD_FVU}, ${__FRAUD_CKF}, ${__FRAUD_FCW}, ${__FRAUD_MQO}, ${__FRAUD_TCC}, ${__FRAUD_GBW}, ${__FRAUD_NRG}, ${__FRAUD_RLX}, ${__FRAUD_AXF}, ${__FRAUD_THJ}, ${__FRAUD_YQV}, ${__FRAUD_YJA}, ${__FRAUD_YPO}, ${__FRAUD_UOQ}, ${__FRAUD_DBI}, ${__FRAUD_BEP}, ${__FRAUD_DPR}, ${__FRAUD_QXX}, ${__FRAUD_QFY}, ${__FRAUD_PTS}, ${__FRAUD_TDP}, ${__FRAUD_GAN}, ${__FRAUD_IPK}, ${__FRAUD_AON}, ${__FRAUD_WNY}, ${__FRAUD_AUM}, ${__FRAUD_WFC}, ${__FRAUD_YWW}, ${__FRAUD_ULK}, ${__FRAUD_IOU}, ${__FRAUD_JNB}, ${__FRAUD_IRT}, ${__FRAUD_ETX}, ${__FRAUD_WDR}, ${__FRAUD_UUY}, ${__FRAUD_MLY})"; +$ADVANCE_FEE_3="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & regexp_match_number(3, ${__FRAUD_KJV}, ${__FRAUD_IRJ}, ${__FRAUD_NEB}, ${__FRAUD_XJR}, ${__FRAUD_EZY}, ${__FRAUD_ZFJ}, ${__FRAUD_KDT}, ${__FRAUD_BGP}, ${__FRAUD_FBI}, ${__FRAUD_JBU}, ${__FRAUD_JYG}, ${__FRAUD_XVW}, ${__FRAUD_SNT}, ${__FRAUD_LTX}, ${__FRAUD_MCQ}, ${__FRAUD_PVN}, ${__FRAUD_FVU}, ${__FRAUD_CKF}, ${__FRAUD_FCW}, ${__FRAUD_MQO}, ${__FRAUD_TCC}, ${__FRAUD_GBW}, ${__FRAUD_NRG}, ${__FRAUD_RLX}, ${__FRAUD_AXF}, ${__FRAUD_THJ}, ${__FRAUD_YQV}, ${__FRAUD_YJA}, ${__FRAUD_YPO}, ${__FRAUD_UOQ}, ${__FRAUD_DBI}, ${__FRAUD_BEP}, ${__FRAUD_DPR}, ${__FRAUD_QXX}, ${__FRAUD_QFY}, ${__FRAUD_PTS}, ${__FRAUD_TDP}, ${__FRAUD_GAN}, ${__FRAUD_IPK}, ${__FRAUD_AON}, ${__FRAUD_WNY}, ${__FRAUD_AUM}, ${__FRAUD_WFC}, ${__FRAUD_YWW}, ${__FRAUD_ULK}, ${__FRAUD_IOU}, ${__FRAUD_JNB}, ${__FRAUD_IRT}, ${__FRAUD_ETX}, ${__FRAUD_WDR}, ${__FRAUD_UUY}, ${__FRAUD_MLY})"; diff --git a/conf/headers.inc b/conf/headers.inc new file mode 100644 index 000000000..29f06b3cb --- /dev/null +++ b/conf/headers.inc @@ -0,0 +1,167 @@ +# Different headers violation + +# Subject need encoding +$__SUBJECT_ENCODED_B64 = "Subject=/=\?\S+\?B\?/iX"; +$__SUBJECT_ENCODED_QP="Subject=/=\?\S+\?Q\?/iX"; +$__SUBJECT_NEEDS_MIME="Subject=/[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/X"; +$SUBJECT_NEEDS_ENCODING = "!${__SUBJECT_ENCODED_B64} & !${__SUBJECT_ENCODED_QP} & ${__SUBJECT_NEEDS_MIME}"; +$__HAS_SUBJECT="header_exists(Subject)"; +$__EMPTY_SUBJECT="Subject=/^$/"; +$MISSING_SUBJECT="!${__HAS_SUBJECT} | ${__EMPTY_SUBJECT}"; +$__R_RCVD_POCHTA_RU="Received=/by mail\d\.ks\.pochta\.ru \( sendmail 8\.\d{2}\.\d\/8\.\d{2}\.\d\) with esmtpa id/H"; +$__R_MUA_OUTLOOK="X-Mailer=/^Microsoft Outlook Express/Hr"; +$__R_MUA_THEBAT="X-Mailer=/^The Bat!/H"; +$__R_CTYPE_TEXT="content_type_is_type(text)"; +$__R_CTE_7BIT="compare_transfer_encoding(7bit)"; +$__R_BODY_8BIT="/[^\x01-\x7f]/Mr"; +$R_BAD_CTE_7BIT="${__R_CTYPE_TEXT} & ${__R_CTE_7BIT} & ${__R_BODY_8BIT}"; +$R_TLD_TK = "/\.tk$/U"; +$R_POCHTA_RU = "${__R_RCVD_POCHTA_RU} & ${R_TLD_TK} & ${SUBJECT_NEEDS_ENCODING}"; +$R_TMP_SPAMMY_MAILER = "X-Mailer=/^(?:Exim 3\.12|Gentoo|Qmail 2\.67|Sendmail 3\.84\/3\.84|WebPOP 1\.0|mLogic)/H"; +$R_WWW_EKONF_COM = "${__R_MEGA_TABLE} & ${__R_GREEK_SYMBOLS}"; +$R_FREE_HOSTING_NAROD = "/\.narod\.ru/U"; +$R_TINYURL = "/http:\/\/(?:tinyurl\.com|snipr\.com|b23\.ru)\/\w/U"; +$R_FREE_HOSTING = "/\.(?:fromru\.com|front\.ru|hotbox\.ru|hotmail\.ru|krovatka\.su|land\.ru|mail15\.com|mail333\.com|newmail\.ru|nightmail\.ru|nm\.ru|pisem\.net|pochtamt\.ru|pop3\.ru|rbcmail\.ru|smtp\.ru)/U"; + +$__HAS_TO="header_exists(To)"; +$MISSING_TO="!${__HAS_TO}"; +$__UNDISC_RCPT="To=/^<?undisclosed-recipient/Hi"; +$R_UNDISC_RCPT="${MISSING_TO} | ${__UNDISC_RCPT}"; + +$__HAS_MID="header_exists(Message-Id)"; +$MISSING_MID="!${__HAS_MID}"; +$R_RCVD_SPAMBOTS="Received=/^from \[\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\] by [-.\w+]{5,255}; [SMTWF][a-z][a-z], [\s\d]?\d [JFMAJSOND][a-z][a-z] \d{4} \d{2}:\d{2}:\d{2} [-+]\d{4}$/mH"; +$R_TO_SEEMS_AUTO="To=/\"?(?<bt>[-.\w]{1,64})\"?\s<\k<bt>\@/H"; +$R_MISSING_CHARSET="content_type_is_type(text) & !content_type_has_param(charset)"; +$R_SAJDING="Subject=/\bsajding(?:om|a)?\b/iH"; +$__R_MUA_MPOP_WEBMAIL="X-Mailer=/^mPOP Web-Mail \d\.\d{2}$/H"; +$__R_MID_MAILRU="Message-Id=/\@w+\.mail\.ru>$/H"; +$__R_RCVD_FROM_MAILRU="Received=/ by [a-z\.]+\d*\.mail\.ru with /H"; +$__R_X_RCVD_FROM_MAILRU="X-Received=/ by [a-z\.]+\d*\.mail\.ru with /H"; +$R_FORGED_MPOP_WEBMAIL="${__R_MUA_MPOP_WEBMAIL} & !(${__R_RCVD_FROM_MAILRU} | ${__R_X_RCVD_FROM_MAILRU} | ${__R_MID_MAILRU})"; +$__R_BGCOLOR="/BGCOLOR=/iM"; +$__R_FONT_COLOR="/font color=[\"']?\#FFFFFF[\"']?/iM"; +$R_WHITE_ON_WHITE="(!${__R_BGCOLOR} & ${__R_FONT_COLOR})"; +$R_NO_SPACE_IN_FROM="From=/\S<[-\w\.]+\@[-\w\.]+>/X"; +$R_FLASH_REDIR_IMGSHACK="/^(?:http:\/\/)?img\d{1,5}\.imageshack\.us\/\S+\.swf/U"; +$__R_RCVD_FROM_VALUEHOST="Received=/\sb0\.valuehost\.ru/H"; +$__R_CYR_PHONE="/8 \(\xD799\)/P"; + +$R_SPAM_FROM_VALUEHOST="${__R_RCVD_FROM_VALUEHOST} & ${__R_CYR_PHONE}"; +$__HAS_USER_AGENT="header_exists(User-Agent)"; +$__HAS_X_MAILER="header_exists(X-Mailer)"; + +$__R_RCVD_FROM_MTU="Received=/smtp\d*\.mtu\.ru/H"; +$__R_MID_MTU="Message-Id=/\@smtp\d*\.mtu\.ru>$/H"; + +$__R_RCVD_FROM_ONO="Received=/smtp\d*\.ono\.com/H"; +$__R_MID_ONO="Message-Id=/\@ono\.com>$/H"; + +$__R_RCVD_FROM_VERSATEL="Received=/mail\d*do\.versatel\.de/H"; +$__R_MID_VERSATEL="Message-Id=/\@versanet\.de>$/H"; + +$__R_RCVD_FROM_LIBERO="Received=/cp-out\d+\.libero\.it/H"; +$__R_MID_LIBERO="Message-Id=/[\da-f]{12}\.[\da-f]{16}@/H"; + +$R_SPAM_FROM_MTU="!(${__HAS_X_MAILER} | ${__HAS_USER_AGENT}) & ${__R_RCVD_FROM_MTU} & ${__R_MID_MTU}"; +$R_SPAM_FROM_ONO="!(${__HAS_X_MAILER} | ${__HAS_USER_AGENT}) & ${__R_RCVD_FROM_ONO} & ${__R_MID_ONO}"; +$R_SPAM_FROM_VERSATEL="!(${__HAS_X_MAILER} | ${__HAS_USER_AGENT}) & ${__R_RCVD_FROM_VERSATEL} & ${__R_MID_VERSATEL}"; +$R_SPAM_FROM_LIBERO="!(${__HAS_X_MAILER} | ${__HAS_USER_AGENT}) & ${__R_RCVD_FROM_LIBERO} & ${__R_MID_LIBERO}"; +#$R_FAKE_OUTLOOK="${__R_MUA_OUTLOOK}"; +# $R_FAKE_OUTLOOK="${__R_MUA_OUTLOOK} & (${SUBJECT_NEEDS_ENCODING} | ${R_BAD_CTE_7BIT})"; +$R_FAKE_OUTLOOK="${__R_MUA_OUTLOOK} & ${R_BAD_CTE_7BIT}"; +$R_FAKE_THEBAT="${__R_MUA_THEBAT} & ${SUBJECT_NEEDS_ENCODING}"; + +$__YAHOO_BULK="Received=/from \[\S+\] by \S+\.(?:groups|scd|dcn)\.yahoo\.com with NNFMP/H"; +$__ANY_OUTLOOK_MUA="X-Mailer=/^Microsoft Outlook\b/H"; +$MIME_HTML_ONLY="has_only_html_part()"; +$FORGED_OUTLOOK_HTML="!${__YAHOO_BULK} & ${__ANY_OUTLOOK_MUA} & ${MIME_HTML_ONLY}"; +$SUSPICIOUS_RECIPS="compare_recipients_distance(0.65)"; +$SORTED_RECIPS="is_recipients_sorted()"; +$TRACKER_ID="/^[a-z0-9]{6,24}[-_a-z0-9]{2,36}[a-z0-9]{6,24}\s*\z/isPr"; +$__FROM_ENCODED_B64="From=/\=\?\S+\?B\?/iX"; +$__FROM_NEEDS_MIME="From=/[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/H"; +$FROM_EXCESS_BASE64="${__FROM_ENCODED_B64} & !${__FROM_NEEDS_MIME}"; + +$__OE_MUA="X-Mailer=/\bOutlook Express [456]\./H"; +$__OE_MSGID_1="Message-Id=/^[A-Za-z0-9-]{7}[A-Za-z0-9]{20}\@hotmail\.com$/mH"; +$__OE_MSGID_2="Message-Id=/^(?:[0-9a-f]{8}|[0-9a-f]{12})\$[0-9a-f]{8}\$[0-9a-f]{8}\@\S+$/mH"; +$__LYRIS_EZLM_REMAILER="List-Unsubscribe=/<mailto:(?:leave-\S+|\S+-unsubscribe)\@\S+>$/H"; +#$__GATED_THROUGH_RCVD_REMOVER="gated_through_received_hdr_remover()"; +$__WACKY_SENDMAIL_VERSION="Received=/\/CWT\/DCE\)/H"; +$__IPLANET_MESSAGING_SERVER="Received=/iPlanet Messaging Server/H"; +$__HOTMAIL_BAYDAV_MSGID="Message-Id=/^BAY\d+-DAV\d+[A-Z0-9]{25}\@phx\.gbl$/mH"; +$__SYMPATICO_MSGID="Message-Id=/^BAYC\d+-PASMTP\d+[A-Z0-9]{25}\@CEZ\.ICE$/mH"; +# $__UNUSABLE_MSGID="${__LYRIS_EZLM_REMAILER} | ${__GATED_THROUGH_RCVD_REMOVER} | ${__WACKY_SENDMAIL_VERSION} | ${__IPLANET_MESSAGING_SERVER} | ${__HOTMAIL_BAYDAV_MSGID} | ${__SYMPATICO_MSGID}"; +$__UNUSABLE_MSGID="${__LYRIS_EZLM_REMAILER} | ${__WACKY_SENDMAIL_VERSION} | ${__IPLANET_MESSAGING_SERVER} | ${__HOTMAIL_BAYDAV_MSGID} | ${__SYMPATICO_MSGID}"; +$__FORGED_OE="${__OE_MUA} & !{__OE_MSGID_1 & !${__OE_MSGID_2} & !{__UNUSABLE_MSGID}"; +$__OUTLOOK_DOLLARS_MUA="X-Mailer=/^Microsoft Outlook(?: 8| CWS, Build 9|, Build 10)\./H"; +$__OUTLOOK_DOLLARS_OTHER="Message-Id=/^\!\~\!/mH"; +$__VISTA_MSGID="Message-Id=/^[A-F\d]{32}\@\S+$/mH"; +$__IMS_MSGID="Message-Id=/^[A-F\d]{36,40}\@\S+$/mH"; +$__FORGED_OUTLOOK_DOLLARS="${__OUTLOOK_DOLLARS_MUA} & !${__OE_MSGID_2} & !${__OUTLOOK_DOLLARS_OTHER} & !${__VISTA_MSGID} & !${__IMS_MSGID} & !${__UNUSABLE_MSGID}"; +$__FMO_EXCL_O3416="X-Mailer=/^Microsoft Outlook, Build 10.0.3416$/H"; +$__FMO_EXCL_OE3790="X-Mailer=/^Microsoft Outlook Express 6.00.3790.3959$/H"; +$FORGED_MUA_OUTLOOK="(${__FORGED_OE} | ${__FORGED_OUTLOOK_DOLLARS}) & !${__FMO_EXCL_O3416} & !${__FMO_EXCL_OE3790} & !${__VISTA_MSGID}"; + +$__SANE_MSGID="Message-Id=/^[^<>\\ \t\n\r\x0b\x80-\xff]+\@[^<>\\ \t\n\r\x0b\x80-\xff]+\s*$/mH"; +$__MSGID_COMMENT="Messagr-Id=/\(.*\)/mH"; +$INVALID_MSGID="${__HAS_MID} & !(${__SANE_MSGID} | ${__MSGID_COMMENT})"; +$HTML_MIME_NO_HTML_TAG="${MIME_HTML_ONLY} & !${__TAG_EXISTS_HTML}"; +$__CD="header_exists(Content-Disposition)"; +$__CTE="header_exists(Content-Transfer-Encoding)"; +$__CT="header_exists(Content-Type)"; +$__MIME_VERSION="header_exists(MIME-Version)"; +#$__CT_TEXT_PLAIN="Content-Type=/^text\/plain\b/iH"; +$__CT_TEXT_PLAIN="content_type_is_type(text) & content_type_is_subtype(plain)"; +$MIME_HEADER_CTYPE_ONLY="!${__CD} & !${__CTE} & ${__CT} & !${__MIME_VERSION} & !${__CT_TEXT_PLAIN}"; + +$__HAS_MSMAIL_PRI="header_exists(X-MSMail-Priority)"; +$__HAS_MIMEOLE="header_exists(X-MimeOLE)"; +$__HAS_SQUIRRELMAIL_IN_MAILER="X-Mailer=/SquirrelMail\b/H"; +$MISSING_MIMEOLE="${__HAS_MSMAIL_PRI} & !${__HAS_MIMEOLE} & !${__HAS_SQUIRRELMAIL_IN_MAILER}"; +$__MSGID_DOLLARS_OK="Message-Id=/[0-9a-f]{4,}\$[0-9a-f]{4,}\$[0-9a-f]{4,}\@\S+/Hr"; +$__MIMEOLE_MS="X-MimeOLE=/^Produced By Microsoft MimeOLE/H"; +$__RCVD_WITH_EXCHANGE="Received=/with Microsoft Exchange Server/H"; +$RATWARE_MS_HASH="${__MSGID_DOLLARS_OK} & !${__MIMEOLE_MS} & !${__RCVD_WITH_EXCHANGE}"; +$STOX_REPLY_TYPE="Content-Type=/text\/plain; .* reply-type=original/H"; +$__FHELO_VERIZON="X-Spam-Relays-Untrusted=/^[^\]]+ helo=[^ ]+verizon\.net /iH"; +$__FHOST_VERIZON="X-Spam-Relays-Untrusted=/^[^\]]+ rdns=[^ ]+verizon\.net /iH"; +$FM_FAKE_HELO_VERIZON="${__FHELO_VERIZON} & !${__FHOST_VERIZON}"; +$__AT_YAHOO_MSGID="Message-Id=/\@yahoo\.com\b/iH"; +$__FROM_YAHOO_COM="From=/\@yahoo\.com\b/iH"; +$FORGED_MSGID_YAHOO="${__AT_YAHOO_MSGID} & !${__FROM_YAHOO_COM}"; + +$__THEBAT_MUA_V1="X-Mailer=/^The Bat! \(v1\./H"; +$__CTYPE_HAS_BOUNDARY="Content-Type=/boundary/iH"; +$__BAT_BOUNDARY="Content-Type=/boundary=\"?-{10}/H"; +$__MAILMAN_21="X-Mailman-Version=/\d/H"; +$__DOUBLE_IP_SPAM_1="Received=/from \[\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\] by \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} with/H"; +$__DOUBLE_IP_SPAM_2="Received=/from\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s+by\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3};/H"; +$FORGED_MUA_THEBAT_BOUN="${__THEBAT_MUA_V1} & ${__CTYPE_HAS_BOUNDARY} & !${__BAT_BOUNDARY} & !${__MAILMAN_21}"; +$RCVD_DOUBLE_IP_SPAM="${__DOUBLE_IP_SPAM_1} | ${__DOUBLE_IP_SPAM_2}"; + +$__REPTO_QUOTE="Reply-To=/\".*\"\s*\</H"; +$__FROM_YAHOO_COM="From=/\@yahoo\.com\b/iH"; +$__AT_YAHOO_MSGID="Message-Id=/\@yahoo\.com\b/iH"; +$REPTO_QUOTE_YAHOO="${__REPTO_QUOTE} & (${__FROM_YAHOO_COM} | ${__AT_YAHOO_MSGID})"; + + +$__XM_GNUS="X-Mailer=/^Gnus v/H"; +$__XM_MSOE5="X-Mailer=/^Microsoft Outlook Express 5/H"; +$__XM_MSOE6="X-Mailer =~ /^Microsoft Outlook Express 6/H"; +$__XM_MOZ4="X-Mailer=/^Mozilla 4/H"; +$__XM_SKYRI="X-Mailer=/^SKYRiXgreen/H"; +$__XM_WWWMAIL="X-Mailer=/^WWW-Mail \d/H"; +$__UA_GNUS="User-Agent=/^Gnus/H"; +$__UA_KNODE="User-Agent=/^KNode/H"; +$__UA_MUTT="User-Agent=/^Mutt/H"; +$__UA_PAN="User-Agent=/^Pan/H"; +$__UA_XNEWS="User-Agent=/^Xnews/H"; +$__NO_INR_YES_REF="${__XM_GNUS} | ${__XM_MSOE5} | ${__XM_MSOE6} | ${__XM_MOZ4} | ${__XM_SKYRI} | ${__XM_WWWMAIL} | ${__UA_GNUS} | ${__UA_KNODE} | ${__UA_MUTT} | ${__UA_PAN} | ${__UA_XNEWS}"; + +$__SUBJ_RE="Subject=/^R[eE]:/H"; +$__HAS_REF="header_exists(References)"; +$__MISSING_REF="!${__HAS_REF}"; +$FAKE_REPLY_C="${__SUBJ_RE} & ${__MISSING_REF} & ${__NO_INR_YES_REF}"; + diff --git a/conf/html.inc b/conf/html.inc new file mode 100644 index 000000000..997b3375a --- /dev/null +++ b/conf/html.inc @@ -0,0 +1,28 @@ +# HTML related rules + +$__MIME_HTML="content_type_is_type(text) & content_type_is_subtype(/.?html/)"; +$__TAG_EXISTS_HTML="/<html/iPr"; +$__TAG_EXISTS_HEAD="/<head>/iPr"; +$__TAG_EXISTS_META="/<meta /iPr"; +$__TAG_EXISTS_BODY="/<body/iPr"; +$FORGED_OUTLOOK_TAGS="!${__YAHOO_BULK} & ${__ANY_OUTLOOK_MUA} & ${__MIME_HTML} & !(${__TAG_EXISTS_HTML} & ${__TAG_EXISTS_HEAD} & ${__TAG_EXISTS_META} & ${__TAG_EXISTS_BODY})"; +$__HTML_LENGTH_1024_1536="has_content_part_len('text', 'html', 1024, 1536)"; +$__HTML_LINK_IMAGE="/<img /iPr"; +$HTML_SHORT_LINK_IMG_2="${__HTML_LENGTH_1024_1536} & ${__HTML_LINK_IMAGE}"; + +$__R_BGCOLOR="/BGCOLOR=/iM"; +$__R_FONT_COLOR="/font color=[\"']?\#FFFFFF[\"']?/iM"; +$R_WHITE_ON_WHITE="(!${__R_BGCOLOR} & ${__R_FONT_COLOR})"; +$R_NO_SPACE_IN_FROM="From=/\S<[-\w\.]+\@[-\w\.]+>/X"; +$R_FLASH_REDIR_IMGSHACK="/^(?:http:\/\/)?img\d{1,5}\.imageshack\.us\/\S+\.swf/U"; +$__R_RCVD_FROM_VALUEHOST="Received=/\sb0\.valuehost\.ru/H"; +$__R_CYR_PHONE="/8 \(\xD799\)/P"; + +$R_SPAM_FROM_VALUEHOST="${__R_RCVD_FROM_VALUEHOST} & ${__R_CYR_PHONE}"; + +$__R_MEGA_TABLE = "/<table border=\"0\" width=\"1\" height=\"30\">\n\s+?<tr>\n\s+?<td bgcolor=\"\#000000\"><\/td>\n\s+?<td><\/td>\n\s+?<td bgcolor=\"\#000000\"><\/td>\n\s+?<td><\/td>\n\s+?<td bgcolor=\"\#000000\"><\/td>\n\s+?<td><\/td>\n\s+?<td bgcolor=\"\#000000\"><\/td>\n\s+?<td><\/td>\n\s+?<td bgcolor=\"\#000000\"><\/td>\n\s+?<td><\/td>\n\s+?<td bgcolor=\"\#000000\"><\/td>\n\s+?<td><\/td>\n\s+?<td><\/td>\n\s+?<td><\/td>/Ps"; +$__R_GREEK_SYMBOLS = "/&\#(?:1293|261|1218|1197|1245|1187|1116|569|1117|267|1179|593|1008|1247|311||311|491)/P"; + +$MIME_HTML_ONLY="has_only_html_part()"; +$FORGED_OUTLOOK_HTML="!${__YAHOO_BULK} & ${__ANY_OUTLOOK_MUA} & ${MIME_HTML_ONLY}"; + diff --git a/conf/lotto.inc b/conf/lotto.inc new file mode 100644 index 000000000..78ffdea57 --- /dev/null +++ b/conf/lotto.inc @@ -0,0 +1,16 @@ +# Rules for lottery spam + + +$__R_LOTTO_FROM="From=/(?:lottery|News center|congratulation to you|NED INFO|BRITISH NATIONAL HEADQUATERS|MICROSOFT ON LINE SUPPORT TEAM|prize|online notification)/iH"; +$__R_LOTTO_SUBJECT="Subject=/(?:\xA3\d|pounds?|FINAL NOTIFICATION|FOR YOUR ATTENTION|File in Your Claims?|ATTN|prize|Claims requirement|amount|confirm|your e-mail address won|congratulations)/iH"; +$__R_LOTTO_BODY="/(?:won|winning|\xA3\d|pounds?|GBP|LOTTERY|awards|prize)/isrP"; +$__KAM_LOTTO1="/(e-?mail address (have emerged a winner|has won|attached to (ticket|reference)|was one of the ten winners)|random selection in our computerized email selection system)/isrP"; +$__KAM_LOTTO2="/((ticket|serial|lucky) number|secret pin ?code|batch number|reference number|promotion date)/isrP"; +$__KAM_LOTTO3="/(won|claim|cash prize|pounds? sterling)/isrP"; +$__KAM_LOTTO4="/(claims (officer|agent)|lottery coordinator|fiduciary (officer|agent)|fiduaciary claims)/isrP"; +$__KAM_LOTTO5="/(freelotto group|Royal Heritage Lottery|UK National (Online)? Lottery|U\.?K\.? Grand Promotions|Lottery Department UK|Euromillion Loteria|Luckyday International Lottery|International Lottery)/isrP"; +$__KAM_LOTTO6="/(Dear Lucky Winner|Winning Notification|Attention:Winner|Dear Winner)/isrP"; +$__KAM_LOTTO7="Subject=/(Your Lucky Day|(Attention:|ONLINE) WINNER)/iH"; +$KAM_LOTTO1="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & regexp_match_number(3, ${__KAM_LOTTO1}, ${__KAM_LOTTO2}, ${__KAM_LOTTO3}, ${__KAM_LOTTO4}, ${__KAM_LOTTO5}, ${__KAM_LOTTO6}, ${__KAM_LOTTO7})"; +$R_LOTTO="(${R_UNDISC_RCPT} | ${R_BAD_CTE_7BIT} | ${R_NO_SPACE_IN_FROM}) & regexp_match_number(3, ${__KAM_LOTTO1}, ${__KAM_LOTTO2}, ${__KAM_LOTTO3}, ${__KAM_LOTTO4}, ${__KAM_LOTTO5}, ${__KAM_LOTTO6})"; + diff --git a/conf/surbl-whitelist.inc b/conf/surbl-whitelist.inc new file mode 100644 index 000000000..de0dfa41e --- /dev/null +++ b/conf/surbl-whitelist.inc @@ -0,0 +1,23 @@ +# +# $Id: rspamd-whitelist.conf,v 1.1 2009/06/11 12:25:37 dmx Exp $ +# +1gost.info +autorambler.ru +easylnk.com +google.ru +li.ru +list.ru +liveinternet.ru +msn.com +nod32.com +price.ru +rambler-co.ru +rambler.ru +subscribe.ru +tns-counter.ru +top4top.ru +wordpress.com +yandex.ru +yandex.net +youtube.com +walmart.com diff --git a/rspamc.pl.in b/rspamc.pl.in index a1f667511..e608abdbd 100755 --- a/rspamc.pl.in +++ b/rspamc.pl.in @@ -43,13 +43,13 @@ sub parse_config { my $ctrl = 0, $skip = 0; while (<CONF>) { - if ($_ =~ /control\s*{/i) { + if ($_ =~ /^.*type.*=.*controller.*$/i) { $ctrl = 1; } if ($ctrl && $_ =~ /}/) { $ctrl = 0; } - if ($_ =~ /lmtp\s*{/i || $_ =~ /delivery\s*{/i) { + if ($_ =~ /^.*type.*=.*(?:lmtp|delivery).*$/i) { $skip = 1; } if ($skip && $_ =~ /}/) { diff --git a/rspamd.conf.sample b/rspamd.conf.sample index 42cf9d2a5..23f31353a 100644 --- a/rspamd.conf.sample +++ b/rspamd.conf.sample @@ -2,10 +2,16 @@ # $Id$ # +.include @CMAKE_INSTALL_PREFIX@/etc/rspamd/drugs.inc +.include @CMAKE_INSTALL_PREFIX@/etc/rspamd/lotto.inc +.include @CMAKE_INSTALL_PREFIX@/etc/rspamd/fraud.inc +.include @CMAKE_INSTALL_PREFIX@/etc/rspamd/headers.inc +.include @CMAKE_INSTALL_PREFIX@/etc/rspamd/html.inc + # pidfile - path to pid file # Default: pidfile = /var/run/rspamd.pid -pidfile = "./rspamd.pid"; +pidfile = "/var/run/rspamd.pid"; worker { @@ -46,7 +52,7 @@ logging { # Log type can be: console, syslog and file log_type = console; # Log level can be: DEBUG, INFO, WARN and ERROR - log_level = DEBUG; + log_level = INFO; # Log facility specifies facility for syslog logging, see syslog (3) for details # log_facility = "LOG_MAIL"; @@ -60,61 +66,128 @@ statfile_pool_size = 40M; # Sample statfile definition -statfile { +#statfile { # Alias is used for learning and is used as symbol - alias = "test.spam"; + #alias = "test.spam"; # Pattern is path to file, can include %r - recipient name and %f - mail from value - pattern = "./test.spam"; + #pattern = "./test.spam"; # Weight in spam/ham classifier - weight = 1.0; + #weight = 1.0; # Size of this statfile class - size = 10M; + #size = 10M; # Tokenizer for this statfile # Deafault: osb-text - tokenizer = "osb-text"; -}; -statfile { - alias = "test.ham"; - pattern = "./test.ham"; - weight = -2.0; - size = 10M; -}; + #tokenizer = "osb-text"; +#}; +#statfile { + #alias = "test.ham"; + #pattern = "./test.ham"; + #weight = -2.0; + #size = 10M; +#}; # Factors coefficients factors { - "SURBL_MULTI" = 10.5; - "winnow" = 5.5; -}; + # SURBL's + "SC_SURBL_MULTI" = 5.5; + "WS_SURBL_MULTI" = 5.5; + "PH_SURBL_MULTI" = 5.5; + "OB_SURBL_MULTI" = 5.5; + "AB_SURBL_MULTI" = 5.5; + "JP_SURBL_MULTI" = 5.5; + "RAMBLER_URIBL" = 10.5; + + # Regexp factors + "R_TINYURL" = 2; + "R_FREE_HOSTING" = 4; + "R_FREE_HOSTING_NAROD" = 3; + "R_WWW_EKONF_COM" = 10; + "SUBJECT_NEEDS_ENCODING" = 1; + "R_POCHTA_RU" = 10; + "R_BAD_CTE_7BIT" = 6; + "R_UNDISC_RCPT" = 5; + "MISSING_MID" = 3; + "R_RCVD_SPAMBOTS" = 3; + "R_TO_SEEMS_AUTO" = 3; + "R_MISSING_CHARSET" = 5; + "R_SAJDING" = 8; + "R_FORGED_MPOP_WEBMAIL" = 8; + "R_WHITE_ON_WHITE" = 9; + "R_NO_SPACE_IN_FROM" = 3; + "R_FLASH_REDIR_IMGSHACK" = 10; + "R_SPAM_FROM_VALUEHOST" = 10; + "R_SPAM_FROM_MTU" = 8; + "R_SPAM_FROM_ONO" = 10; + "R_SPAM_FROM_VERSATEL" = 10; + "R_SPAM_FROM_LIBERO" = 10; + "R_FAKE_OUTLOOK"= 8; + "R_FAKE_THEBAT"= 8; + "R_LOTTO" = 5; + "KAM_LOTTO1" = 7; + "FORGED_OUTLOOK_HTML" = 5; + "SUSPICIOUS_RECIPS" = 3.5; + "HTML_TAG_BALANCE_HEAD" = 5; + "SORTED_RECIPS" = 3.5; + "TRACKER_ID" = 3.843; + "ADVANCE_FEE_2" = 3.300; + "ADVANCE_FEE_3" = 2.121; + "FORGED_MUA_OUTLOOK" = 3; + "FORGED_OUTLOOK_TAGS" = 2.099; + "HTML_SHORT_LINK_IMG_2" = 3; + "INVALID_MSGID" = 5; + "HTML_MIME_NO_HTML_TAG" = 2; + "MIME_HEADER_CTYPE_ONLY" = 2; + "MISSING_MIMEOLE" = 5; + "MISSING_SUBJECT" = 2; + "RATWARE_MS_HASH" = 2; + "STOX_REPLY_TYPE" = 1; + "FM_FAKE_HELO_VERIZON" = 2; + "FORGED_MSGID_YAHOO" = 2; + "FORGED_MUA_THEBAT_BOUN" = 2; + "RCVD_DOUBLE_IP_SPAM" = 2; + "REPTO_QUOTE_YAHOO" = 2; + "DRUGS_DIET" = 2; + "DRUGS_ERECTILE" = 2; + "DRUGS_ANXIETY" = 2; + "DRUGS_ANXIETY_EREC" = 2; + "DRUGS_MANYKINDS" = 2; + "FAKE_REPLY_C" = 6; + "MIME_HTML_ONLY" = 1; + + # Modules factors + "R_MIXED_CHARSET" = 5; + "R_BAD_EMAIL" = 10.5; +}; # Options for lmtp worker -worker { - type = "lmtp"; +#worker { + #type = "lmtp"; # Bind socket for lmtp interface - bind_socket = localhost:11335; + #bind_socket = localhost:11335; # Metric that is considered as main. If we have spam result on # this metric, lmtp delivery would be failed - metric = "default"; + #metric = "default"; # Number of lmtp workers - count = 1; -}; + #count = 1; +#}; #worker { -# type = "delivery"; + #type = "delivery"; # Path to delivery agent, %f is expanded as mail from address and %r # is expanded as recipient address # Expample: agent = "/usr/local/bin/procmail -f %f -d %r" -# agent = "/dev/null"; + #agent = "/dev/null"; # Bind socket for lmtp interface # Example: bind_socket = localhost:25 # Whether we should use lmtp for MTA delivery -# lmtp = no; + #lmtp = no; #}; # SURBL module params, note that single quotes are mandatory here .module 'surbl' { # Address to redirector in host:port format - redirector = "localhost:8080"; + #redirector = "localhost:8080"; # Connect timeout for redirector redirector_connect_timeout = "1s"; # IO timeout for redirector (may be usefull to set this value rather big) @@ -136,30 +209,80 @@ worker { # Metric for surbl module metric = "default"; # List of public known hostings (for which we should use 3 components of domain name instead of 2) - 2tld = "file:///some/path/file"; + 2tld = "file://@CMAKE_INSTALL_PREFIX@/etc/rspamd/2tld.inc"; # Whitelisted urls - whitelist = "file:///some/other/file"; + whitelist = "file://@CMAKE_INSTALL_PREFIX@/etc/rspamd/surbl-whitelist.inc"; }; - -$to_blah = "To=/\"blah@blah\"/H"; -$from_blah = "From=/blah@blah/H"; -$subject_blah = "Subject=/blah/H"; - .module 'regexp' { - BLAH_SYMBOL = "${to_blah} & !(${from_blah} | ${subject_blah})"; + R_TINYURL = "${R_TINYURL}"; + R_FREE_HOSTING = "${R_FREE_HOSTING}"; + R_WWW_EKONF_COM = "${R_WWW_EKONF_COM}"; + R_FREE_HOSTING_NAROD = "${R_FREE_HOSTING_NAROD}"; + SUBJECT_NEEDS_ENCODING = "${SUBJECT_NEEDS_ENCODING}"; + + R_POCHTA_RU = "${R_POCHTA_RU}"; + R_BAD_CTE_7BIT="${R_BAD_CTE_7BIT}"; + R_UNDISC_RCPT="${R_UNDISC_RCPT}"; + MISSING_MID="${MISSING_MID}"; + R_RCVD_SPAMBOTS="${R_RCVD_SPAMBOTS}"; + R_TO_SEEMS_AUTO="${R_TO_SEEMS_AUTO}"; + R_SAJDING="${R_SAJDING}"; + R_FORGED_MPOP_WEBMAIL="${R_FORGED_MPOP_WEBMAIL}"; + R_WHITE_ON_WHITE="${R_WHITE_ON_WHITE}"; + R_NO_SPACE_IN_FROM="${R_NO_SPACE_IN_FROM}"; + R_FLASH_REDIR_IMGSHACK="${R_FLASH_REDIR_IMGSHACK}"; + R_SPAM_FROM_VALUEHOST="${R_SPAM_FROM_VALUEHOST}"; + R_SPAM_FROM_MTU="${R_SPAM_FROM_MTU}"; + R_SPAM_FROM_ONO="${R_SPAM_FROM_ONO}"; + R_SPAM_FROM_VERSATEL="${R_SPAM_FROM_VERSATEL}"; + R_SPAM_FROM_LIBERO="${R_SPAM_FROM_LIBERO}"; + R_FAKE_OUTLOOK="${R_FAKE_OUTLOOK}"; + R_FAKE_THEBAT="${R_FAKE_THEBAT}"; + R_MISSING_CHARSET="${R_MISSING_CHARSET}"; + R_LOTTO="${R_LOTTO}"; + KAM_LOTTO1="${KAM_LOTTO1}"; + FORGED_OUTLOOK_HTML="${FORGED_OUTLOOK_HTML}"; + SUSPICIOUS_RECIPS="${SUSPICIOUS_RECIPS}"; + SORTED_RECIPS="${SORTED_RECIPS}"; + TRACKER_ID="${TRACKER_ID}"; + ADVANCE_FEE_2="${ADVANCE_FEE_2}"; + ADVANCE_FEE_3="${ADVANCE_FEE_3}"; + FORGED_MUA_OUTLOOK="${FORGED_MUA_OUTLOOK}"; + FORGED_OUTLOOK_TAGS="${FORGED_OUTLOOK_TAGS}"; + HTML_SHORT_LINK_IMG_2="${HTML_SHORT_LINK_IMG_2}"; + INVALID_MSGID="${INVALID_MSGID}"; + HTML_MIME_NO_HTML_TAG="${HTML_MIME_NO_HTML_TAG}"; + MIME_HEADER_CTYPE_ONLY="${MIME_HEADER_CTYPE_ONLY}"; + MISSING_MIMEOLE="${MISSING_MIMEOLE}"; + MISSING_SUBJECT="${MISSING_SUBJECT}"; + RATWARE_MS_HASH="${RATWARE_MS_HASH}"; + STOX_REPLY_TYPE="${STOX_REPLY_TYPE}"; + FM_FAKE_HELO_VERIZON="${FM_FAKE_HELO_VERIZON}"; + FORGED_MSGID_YAHOO="${FORGED_MSGID_YAHOO}"; + FORGED_MUA_THEBAT_BOUN="${FORGED_MUA_THEBAT_BOUN}"; + RCVD_DOUBLE_IP_SPAM="${RCVD_DOUBLE_IP_SPAM}"; + REPTO_QUOTE_YAHOO="${REPTO_QUOTE_YAHOO}"; + DRUGS_DIET="${DRUGS_DIET}"; + DRUGS_ERECTILE="${DRUGS_ERECTILE}"; + DRUGS_ANXIETY="${DRUGS_ANXIETY}"; + DRUGS_ANXIETY_EREC="${DRUGS_ANXIETY_EREC}"; + DRUGS_MANYKINDS="${DRUGS_MANYKINDS}"; + + FAKE_REPLY_C="${FAKE_REPLY_C}"; + MIME_HTML_ONLY="${MIME_HTML_ONLY}"; }; .module 'chartable' { metric = "default"; - symbold = "R_MIXED_CHARSET"; + symbol = "R_MIXED_CHARSET"; threshold = "0.1"; }; .module 'emails' { metric = "default"; - symbold = "R_BAD_EMAIL"; - blacklist = "file:///some/path/emails.lst"; + symbol = "R_BAD_EMAIL"; + #blacklist = "file:///some/path/emails.lst"; }; # If enables threat each regexp as raw regex and do not try to convert |