diff --git CREDITS CREDITS index 5539d7790..ca8c66309 100644 --- CREDITS +++ CREDITS @@ -1,3 +1,5 @@ +Copyright (C) 2015 The Apache Software Foundation + Project Management Committee (PMC): This list contains PMC members in alphabetical order (and their Amazon @@ -7,21 +9,22 @@ Project Management Committee (PMC): non-confidential correspondence. - Karsten Bräckelmann - - Alex Broens + - Alex Broens http://www.msf.org/en/donate - Adam Katz - - Sidney Markowitz http://www.amazon.com/o/registry/1WJ8J7403BLTS + - Sidney Markowitz - Mark Martinec - Kevin A. McGrail - Michael Parker http://www.amazon.com/o/registry/10BBAR2M03T6F + - Joe Quinn Committers: This list contains committers in alphabetical order (and their Amazon wishlists). + - Bill Cole - John Hardin - Henrik Krohns - - Joe Quinn PMC Emeritus & inactive committers: @@ -33,7 +36,7 @@ PMC Emeritus & inactive committers: - Duncan Findlay - PMC Emeritus - Tony Finch - Steve Freegard - - Craig Hughes + - Craig Hughes - Matt Kettler - PMC Emeritus - Justin Mason - Creator & PMC Emeritus - Robert Menschel @@ -97,9 +100,9 @@ Major contributions: to avoid losing mail from spamc; BSMTP and -e support; tracking of number of spamd processes; several other mods. - - Kristian Koehntopp : LDAP support. + - Kristian Köhntopp, : LDAP support. - - Matthias Leisi : Mail::SpamAssassin::Plugin::ASN + - Matthias Leisi, : Mail::SpamAssassin::Plugin::ASN plugin. - Daniel Lemke, : many Windows support fixes @@ -291,14 +294,14 @@ Patch submitters: - Mike Nolan, : SunOS build directions - - Martin O"stlund, : Slackware 9.0 rc-script for spamd. + - Martin Östlund, : Slackware 9.0 rc-script for spamd. - Tomasz Ostrowski, : perl 5.005 support. - Henning P. Schmiedehausen, : adding ? to shell globs. - - Francesco Potorti, : documentation improvements + - Francesco Potortì, : documentation improvements - Alan Premselaar, : rule suggestions. @@ -369,7 +372,7 @@ ASF Sponsorship: Resources: Thanks to our previous mirrors: Peregrine Computer Consultants Corporation - (previously Peregrine Hardware, Inc.) and Kevin A. McGrail, Jeremy Zawodny, + (previously Peregrine Hardware, Inc.) and Kevin A. McGrail, Jeremy Zawodny, Mark Reynolds, RedIRIS, Hagen Herrschaft, and PlanetMirror. Thanks to Mark Reynolds of Reynolds Technology (http://www.reynolds.net.au/) @@ -391,6 +394,6 @@ Resources: Finally: Thanks to James Thompson at cPanel Inc who designed our new logo in 2014 and - to Christian Rauh, winner of the SpamAssassin logo contest, who created, + to Christian Rauh, winner of the SpamAssassin logo contest, who created, designed, and illustrated our first Apache SpamAssassin logo. diff --git MANIFEST MANIFEST index edef111da..6c8d08b18 100644 --- MANIFEST +++ MANIFEST @@ -124,7 +124,6 @@ lib/Mail/SpamAssassin/Timeout.pm lib/Mail/SpamAssassin/Util.pm lib/Mail/SpamAssassin/Util/DependencyInfo.pm lib/Mail/SpamAssassin/Util/Progress.pm -lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm lib/Mail/SpamAssassin/Util/ScopedTimer.pm lib/Mail/SpamAssassin/Util/TieOneStringHash.pm lib/spamassassin-run.pod @@ -143,6 +142,8 @@ rules/v330.pre rules/v340.pre rules/v341.pre rules/20_aux_tlds.cf +rules-extras/README.txt +rules-extras/10_uridnsbl_skip_financial.cf sa-awl.raw sa-check_spamd.raw sa-compile.raw @@ -338,6 +339,8 @@ t/data/nice/spf1 t/data/nice/spf2 t/data/nice/spf3 t/data/nice/spf3-received-spf +t/data/nice/unicode1 +t/data/nice/unicode2 t/data/reporterplugin.pm t/data/spam/001 t/data/spam/002 @@ -417,6 +420,7 @@ t/get_all_headers.t t/get_headers.t t/gtube.t t/hashcash.t +t/header_utf8.t t/html_colors.t t/html_obfu.t t/html_utf8.t @@ -511,8 +515,6 @@ t/spamd_report_ifspam.t t/spamd_sql_prefs.t t/spamd_ssl.t t/spamd_ssl_accept_fail.t -t/spamd_ssl_tls.t -t/spamd_ssl_v3.t t/spamd_stop.t t/spamd_symbols.t t/spamd_syslog.t diff --git Makefile.PL Makefile.PL index 33ca2af01..dbb0de5e6 100644 --- Makefile.PL +++ Makefile.PL @@ -72,6 +72,7 @@ my @ATT_KEYS = ( 'BUILD_SPAMC' , # Set to 'no' to skip build of spamc. + 'BUILD_SPAMD', # Set to 'no' to skip build of spamd. 'ENABLE_SSL', # Set to 'yes' to build spamc with SSL support. 'CONTACT_ADDRESS', # To not ask for the contact address, use this. ); @@ -105,6 +106,7 @@ sub yesno { my %opt = ( 'build_spamc' => undef, + 'build_spamd' => undef, 'enable_ssl' => undef, 'contact_address' => undef, 'destdir' => undef, @@ -267,9 +269,19 @@ unless($Config{installman1dir} # Windows platforms need some adjustments if (RUNNING_ON_WINDOWS) { - # Don't build spamd - delete $makefile{EXE_FILES}{'spamd/spamd.raw'}; - delete $makefile{MAN1PODS}{'spamd/spamd'}; + # Building spamd is optional on Windows because it still is somewhat + # experimental. + if (!defined $opt{'build_spamd'}) { + $opt{'build_spamd'} = bool(prompt( + "Build spamd.exe (experimental on windows platforms)? (y/n)", + 'n')); + } else { + $opt{'build_spamd'} = bool($opt{'build_spamd'}); + } + if (!$opt{'build_spamd'}) { + delete $makefile{EXE_FILES}{'spamd/spamd.raw'}; + delete $makefile{MAN1PODS}{'spamd/spamd'}; + } # building spamc is optional under Win32 because not everyone has compiler if (!defined $opt{'build_spamc'}) { $opt{'build_spamc'} = bool(prompt( @@ -1057,7 +1069,7 @@ spamc_has_moved: $(NOECHO) echo "*** spamc now has its own directory: $(TARGET) is $(SOURCE)" $(NOECHO) echo "***" $(PERL) -MFile::Spec -MFile::Copy \ - -e "copy(q{$(SOURCE)}, q{$(TARGET)});" + -e "copy(q[$(SOURCE)], q{$(TARGET)});" spamc/libspamc.so: $(SPAMC_MAKEFILE) $(LIBSPAMC_SRC) $(MAKE_SPAMC) $@ @@ -1093,14 +1105,14 @@ qmail/qmail-spamc$(EXE_EXT): spamc/qmail-spamc$(EXE_EXT) conf__install: -$(MKPATH) $(B_CONFDIR) - $(PERL) -MFile::Copy -e "copy(q{rules/local.cf}, q{$(B_CONFDIR)/local.cf}) unless -f q{$(B_CONFDIR)/local.cf}" - $(PERL) -MFile::Copy -e "copy(q{rules/init.pre}, q{$(B_CONFDIR)/init.pre}) unless -f q{$(B_CONFDIR)/init.pre}" - $(PERL) -MFile::Copy -e "copy(q{rules/v310.pre}, q{$(B_CONFDIR)/v310.pre}) unless -f q{$(B_CONFDIR)/v310.pre}" - $(PERL) -MFile::Copy -e "copy(q{rules/v312.pre}, q{$(B_CONFDIR)/v312.pre}) unless -f q{$(B_CONFDIR)/v312.pre}" - $(PERL) -MFile::Copy -e "copy(q{rules/v320.pre}, q{$(B_CONFDIR)/v320.pre}) unless -f q{$(B_CONFDIR)/v320.pre}" - $(PERL) -MFile::Copy -e "copy(q{rules/v330.pre}, q{$(B_CONFDIR)/v330.pre}) unless -f q{$(B_CONFDIR)/v330.pre}" - $(PERL) -MFile::Copy -e "copy(q{rules/v340.pre}, q{$(B_CONFDIR)/v340.pre}) unless -f q{$(B_CONFDIR)/v340.pre}" - $(PERL) -MFile::Copy -e "copy(q{rules/v341.pre}, q{$(B_CONFDIR)/v341.pre}) unless -f q{$(B_CONFDIR)/v341.pre}" + $(PERL) -MFile::Copy -e "copy(q[rules/local.cf], q{$(B_CONFDIR)/local.cf}) unless -f q{$(B_CONFDIR)/local.cf}" + $(PERL) -MFile::Copy -e "copy(q[rules/init.pre], q{$(B_CONFDIR)/init.pre}) unless -f q{$(B_CONFDIR)/init.pre}" + $(PERL) -MFile::Copy -e "copy(q[rules/v310.pre], q{$(B_CONFDIR)/v310.pre}) unless -f q{$(B_CONFDIR)/v310.pre}" + $(PERL) -MFile::Copy -e "copy(q[rules/v312.pre], q{$(B_CONFDIR)/v312.pre}) unless -f q{$(B_CONFDIR)/v312.pre}" + $(PERL) -MFile::Copy -e "copy(q[rules/v320.pre], q{$(B_CONFDIR)/v320.pre}) unless -f q{$(B_CONFDIR)/v320.pre}" + $(PERL) -MFile::Copy -e "copy(q[rules/v330.pre], q{$(B_CONFDIR)/v330.pre}) unless -f q{$(B_CONFDIR)/v330.pre}" + $(PERL) -MFile::Copy -e "copy(q[rules/v340.pre], q{$(B_CONFDIR)/v340.pre}) unless -f q{$(B_CONFDIR)/v340.pre}" + $(PERL) -MFile::Copy -e "copy(q[rules/v341.pre], q{$(B_CONFDIR)/v341.pre}) unless -f q{$(B_CONFDIR)/v341.pre}" data__install: diff --git build/README build/README index 76514b31a..f0e920e51 100644 --- build/README +++ build/README @@ -7,6 +7,10 @@ SPAMASSASSIN DEVELOPMENT SNAPSHOT PROCEDURE ssh spamassassin.zones.apache.org cd [checkedoutdir] + NOTE: SpamAssassin.Zones is a crashed box replaced by SpamAssassin-vm. + A copy of the sabuildtools is in + /root/zonestorage-restore/spamassassin/export/home/kmcgrail/sabuildtools + - ensure the required code and data is available for the build scripts: $HOME/sabuildtools @@ -27,6 +31,12 @@ SPAMASSASSIN DEVELOPMENT SNAPSHOT PROCEDURE - by default, they're written to ~/public_html/devel/ . Copy them to wherever you want, yourself. +- KAM has included all of these on one of his devel systems with perl + 5.8.6 (the earliest supported version of Perl as of 4/28/2015). One + note is that you need webmake 2.4 from + http://webmake.taint.org/devel/HTML-WebMake-2.4.tar.gz + + Also, a copy is mirrored at www.pccc.com/downloads/SpamAssassin SPAMASSASSIN RELEASE PROCEDURE @@ -124,7 +134,14 @@ SPAMASSASSIN RELEASE PROCEDURE 3.1.0 work (created 3.0 branch); replace with the correct rev number for the point you want to start at if different. I used r1149751 to produce the first 3.4.0-pre release, for example. r1567124 is the - 3.4.0 release tag revision. + 3.4.0 release tag revision. r1676613 is the 3.4.1 release tag + revision. + + To find the release tag revision, use the tags website, i.e.: + https://svn.apache.org/repos/asf/spamassassin/tags/ + + NOTE: 3.4.1 is currently on trunk and will be branched to 3.4 because + 3.4 branch was not used. - Exclude some automated changes from the logs: @@ -178,7 +195,7 @@ SPAMASSASSIN RELEASE PROCEDURE - For a maintainance release (x.y.1, x.y.2): - vers=3_1_7 + vers=3_4_2 Then run: @@ -189,7 +206,7 @@ SPAMASSASSIN RELEASE PROCEDURE - For a trunk release (x.y.0): - vers=3_4_0 + vers=3_4_1 repo=https://svn.apache.org/repos/asf/spamassassin svn delete -m "replaced old tag" $repo/tags/spamassassin_release_$vers @@ -223,7 +240,7 @@ SPAMASSASSIN RELEASE PROCEDURE #These should be set from the previous command unless you are on a # different server - #vers=3_3_0_alpha_2 + #vers=3_4_1 #repo=https://svn.apache.org/repos/asf/spamassassin rm -rf ~/relbuild svn co $repo/tags/spamassassin_release_$vers ~/relbuild @@ -246,7 +263,8 @@ SPAMASSASSIN RELEASE PROCEDURE module, you didn't update your $PATH. - run "./build/repackage_latest_update_rules" to repackage the - latest file on updates.spamassassin.org. + latest rules file for the purpose of publishing a starter rule file + for people who can't run sa-update: PATH=$HOME/sabuildtools/bin:$PATH ./build/repackage_latest_update_rules @@ -258,13 +276,13 @@ SPAMASSASSIN RELEASE PROCEDURE links, fixing the MD5 and SHA1 checksums in this mail, and summarising the important changes from the Changes file. - cp build/announcements/3.3.2.txt build/announcements/3.4.0.txt + cp build/announcements/3.4.0.txt build/announcements/3.4.1.txt svn add !$ vi !$ NOTE: Here's a quick example to concat the MD5 or SHA1 checksums: - ls *3.4.1*rc2* | grep md5 | xargs cat -- - ls *3.4.1*rc2* | grep sha1 | xargs cat -- + ls *3.4.1.* | grep md5 | xargs cat -- + ls *3.4.1.* | grep sha1 | xargs cat -- - If there are any issues to note, make sure to edit the UPGRADE file. Every x.x.0 release at a minimum should include a section. @@ -272,12 +290,12 @@ SPAMASSASSIN RELEASE PROCEDURE - Check the README file for any items to change. - (for any rc, prerelease, or full release) Place the tarballs in a - discrete location (discrete means not linked from the "downloads" page + discreet location (discreet means not linked from the "downloads" page of the website, but included in the vote mail) and request a vote on the development mailing list to make the release. Post the URL, md5sums/sha1sums, and proposed release announcement mail to the dev list. The default location -- ~/public_html/devel/ , or - http://people.apache.org/~jm/devel/ , qualifies as "discrete". + http://people.apache.org/~jm/devel/ , qualifies as "discreet". If you build on the zone, you will need to scp them over: @@ -323,7 +341,8 @@ SPAMASSASSIN RELEASE PROCEDURE branch, off the trunk. repo=https://svn.apache.org/repos/asf/spamassassin - svn copy $repo/tags/spamassassin_release_3_4_0 $repo/branches/3.4 -m 'Creating 3.4 branch' + svn copy $repo/tags/spamassassin_release_3_4_1 $repo/branches/3.4 \ + -m 'Creating 3.4 branch based on 3.4.1 so that trunk can go to 4.0' "trunk" is SVN's concept of head. Typically, our branches are named for their minor version number. In the example above, 3.4 is the @@ -343,24 +362,26 @@ SPAMASSASSIN RELEASE PROCEDURE - lib/Mail/SpamAssassin.pm + +- In build/mkupdates/run_part2 change versions="X.Y.Z" to the new + development version (A.B.C). + + - [X.Y.0 RELEASES ONLY]: NOTE: This may change, make sure this info is still accurate before changing this (preferrably find out BEFORE doing an X.Y.0 release. Update the mkupdates stuff: - - In build/mkupdates/run_part2 change versions="X.Y.0" to the new - development version (X.Y+1.0). - - - Create an empty file for the new version number in - /var/named/updates.dev.spamassassin.org.d/ on the zone and chown to - updatesd and chgrp to others. - - NOTE: I don't believe this does anything KAM: 2014-02-11 +# - Create an empty file for the new version number in +# /var/named/updates.dev.spamassassin.org.d/ on the zone and chown to +# updatesd and chgrp to others. +# +# NOTE: I don't believe this does anything KAM: 2014-02-11 and the +# directory doesn't exist. - Add "$INCLUDE /var/named/updates.spamassassin.org.d/X.Y+1.0" to the - spamassassin.org zone file in spamassassin/dns/ + /var/named/spamassassin.org zone file OR use a CNAME (see below) - NOTE: I don't believe this is right. Per bug 6644, using a CNAME for - 3.4.1 to 3.4.0 - KAM: 2014-02-11 + NOTE: Per bug 6644, using a CNAME for 3.4.1, 3.4.2, 4.0.0 to 3.3.2 - commit the changes, update the zone (if not doing above on the zone - NOTE: See /var/named/README) and tick the zone file using build/mkupdates/tick_zone_serial @@ -388,7 +409,7 @@ SPAMASSASSIN RELEASE PROCEDURE ./update-rules-3.3 3.4 NOTE: Is this needed with our rule update process? not convinced it - is... + is... 2015-04-28 - publish the tarballs @@ -461,7 +482,8 @@ SPAMASSASSIN RELEASE PROCEDURE WARNING: if you're moving to a new major release, x.y.0, you need to edit 'build/update_website_docs' beforehand and set the "vers" line. And if - you are running on a server other than buildbot, the PERL and WEBDIR + you are running on a server other than buildbot (NOTE: this works on the + spamassassin-vm.apache.org box), the PERL and WEBDIR vars may need tweaking: cd /var/www/buildbot.spamassassin.org/staging/website @@ -469,7 +491,10 @@ SPAMASSASSIN RELEASE PROCEDURE svn delete --force full/3.4.x svn commit -m "removing old doc tree from website" full - cd [checkedoutdir] + #Checkout the current release + cd /tmp + svn checkout http://svn.apache.org/repos/asf/spamassassin/tags/spamassassin_release_3_4_1/ release + cd /tmp/release build/update_website_docs cd /var/www/buildbot.spamassassin.org/staging/website @@ -487,7 +512,7 @@ SPAMASSASSIN RELEASE PROCEDURE repo=https://svn.apache.org/repos/asf/spamassassin svn delete -m "updating for new release" $repo/tags/spamassassin_current_release_3.4.x - svn copy -m "updating for new release" $repo/tags/spamassassin_release_3_4_0 $repo/tags/spamassassin_current_release_3.4.x + svn copy -m "updating for new release" $repo/tags/spamassassin_release_3_4_1 $repo/tags/spamassassin_current_release_3.4.x - upload release .tar.gz (not .bz2) tarball to CPAN at http://pause.cpan.org/: @@ -511,7 +536,7 @@ SPAMASSASSIN RELEASE PROCEDURE NOTE: you must send this mail with a "From:" address @apache.org, otherwise it'll be bounced by the ASF's custom spam filtering - rules. + rules. See [1] below for more requirements for the announce email. # IGNORING 2014-02-11 - NOT BOTHERING WITH FRESHMEAT - TOO OUT OF DATE AND # NOW FREECODE @@ -525,6 +550,9 @@ SPAMASSASSIN RELEASE PROCEDURE # # http://freshmeat.net/projects/spamassassin/releases/new +- SourceForge - We have an account with SF and haven't published to it in + some time. Need to consider doing so. + - Approve the posting to the announce list (the list admins will get a mail indicating how to do this.) @@ -545,6 +573,7 @@ SPAMASSASSIN RELEASE PROCEDURE documented in this README: - Reviewed the CREDITS to update inactive Committers + - Reviewed the CREDITS to update Copyright - Reviewed the website and wiki for less obvious items like: - updating things that refered to 3.3 to 3.4 such as SVN Checkout instructions. @@ -553,7 +582,8 @@ SPAMASSASSIN RELEASE PROCEDURE - Reviewed the Project Branding Report Checklist -- Issues not resolved with the release of 3.4.0 2014-02-11 +- Issues not resolved with the release of 3.4.0 2014-02-11 and still not + resolved with the release of 3.4.1 2015-04-28 - We updated to 3.4.0 and 3.4.1 is the current trunk version. Need to confirm what needs to change on the updatesd scripts to make this @@ -563,4 +593,42 @@ SPAMASSASSIN RELEASE PROCEDURE running masscheck for that version and release the updates with proper ifplugin loops for any new features? + +------------- +[1] + +Hello PMCs --a quick reminder that messages sent to announce@apache.org +feature prominently in the weekly Apache News Round-ups [1]. + +In order for your announcement be happily moderated through, don't forget +to: + +0) write "[ANNOUNCE]", "[ANN]", or "[SECURITY]" in the subject line as +appropriate + +1) send the email in PLAINTEXT --all else will be rejected + +2) include the appropriate URL for downloading the release or more +information regarding the announcement + +3) include the DOAP! Whilst you may know what your project is, there's a +chance that others don't, particularly those with funky names/acronyms. +Hone your message and polish that one-liner! + +3a) speaking of DOAP, relationships are nice --does your project +involve/play with other technologies? If so, state the relationship. + +4) include a way for folks to learn more. Link to the project home page or +a mailing list or some other relevant resource. + +5) personalize your closing --whether your choose to sign off using your +name or (on behalf of) the collective PMC, doing something friendly +reinforces the "community" aspect of the ASF. + + +Thanks so much, +Sally + + +[1] https://blogs.apache.org/foundation/date/20150612 // vim:tw=74: diff --git build/announcements/3.4.1.txt build/announcements/3.4.1.txt index b9f50cc10..80d827794 100644 --- build/announcements/3.4.1.txt +++ build/announcements/3.4.1.txt @@ -527,17 +527,17 @@ http://spamassassin.apache.org/downloads.cgi md5sum of archive files: -55f934b54e7b56fb89b491b289b10e8d Mail-SpamAssassin-3.4.1-rc2.tar.bz2 -3c9a10d08731877a29499769e75e35b3 Mail-SpamAssassin-3.4.1-rc2.tar.gz -8d01b76eb532499f590a8314f5a5f938 Mail-SpamAssassin-3.4.1-rc2.zip -a5e4a3f174cc5ed8b18077f9a6ca0c47 Mail-SpamAssassin-rules-3.4.1-rc2.r1670273.tgz +0db5d27d7b782ff5eadee12b95eae84c Mail-SpamAssassin-3.4.1.tar.bz2 +76eca1f38c11635d319e62c26d5b034b Mail-SpamAssassin-3.4.1.tar.gz +2bbbf838d722c006b5ab97db167e4b22 Mail-SpamAssassin-3.4.1.zip +4a1cbafbee2d0ae8c4f2f9ac05b4b3aa Mail-SpamAssassin-rules-3.4.1.r1675274.tgz sha1sum of archive files: -0efbde52cdc882461d2e3a5d9ed977499268cadd Mail-SpamAssassin-3.4.1-rc2.tar.bz2 -bb2eb6a3c7a79ea1cc2a53abf668b4684f39b653 Mail-SpamAssassin-3.4.1-rc2.tar.gz -318ac84a1f17e19229d9813078a12e7f4825dee3 Mail-SpamAssassin-3.4.1-rc2.zip -680c231efd0cfca69e665445a9472d0b7bb3a8fe Mail-SpamAssassin-rules-3.4.1-rc2.r1670273.tgz +ddd62c5ab376554b0110b8fdc84f3508ea590659 Mail-SpamAssassin-3.4.1.tar.bz2 +e7b342d30f4983f70f4234480b489ccc7d2aa615 Mail-SpamAssassin-3.4.1.tar.gz +4fae06059eeffaba43d7779f764ecda52e31af85 Mail-SpamAssassin-3.4.1.zip +fcbcbf767f8c0b1b2ce2c3be4010cf6130f826b9 Mail-SpamAssassin-rules-3.4.1.r1675274.tgz Note that the *-rules-*.tar.gz files are only necessary if you cannot, or do not wish to, run "sa-update" after install to download the latest diff --git build/mkupdates/run_part2 build/mkupdates/run_part2 index 08aa0f88d..7f032bf3e 100755 --- build/mkupdates/run_part2 +++ build/mkupdates/run_part2 @@ -38,7 +38,7 @@ dnsdir=/var/named/updates.spamassassin.org.d # soadir=/var/named/spamassassin.org.d -versions="3.4.1" +versions="3.4.2" # --------------------------------------------------------------------------- # TODO: if/when this becomes fully automatic, this commit will be superfluous diff --git contrib/HOWTO.Bayes-Redis/SA-rule/bayes_redis.cf contrib/HOWTO.Bayes-Redis/SA-rule/bayes_redis.cf index 34858b573..420701ae7 100644 --- contrib/HOWTO.Bayes-Redis/SA-rule/bayes_redis.cf +++ contrib/HOWTO.Bayes-Redis/SA-rule/bayes_redis.cf @@ -31,7 +31,7 @@ bayes_store_module Mail::SpamAssassin::BayesStore::Redis # the Redis server/port will not be reachable from the "outside". # See Redis docs for authentication syntax -bayes_sql_dsn server=192.168.1.2:6379 +bayes_sql_dsn server=192.168.1.2:6379,database=0 use_bayes 1 use_bayes_rules 1 diff --git lib/Mail/SpamAssassin.pm lib/Mail/SpamAssassin.pm index 9e5064c81..4c76a71f7 100644 --- lib/Mail/SpamAssassin.pm +++ lib/Mail/SpamAssassin.pm @@ -61,7 +61,7 @@ or the C/C tools provided. package Mail::SpamAssassin; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; require 5.006_001; @@ -94,8 +94,8 @@ use vars qw{ @site_rules_path }; -$VERSION = "3.004001"; # update after release (same format as perl $]) -#$IS_DEVEL_BUILD = 1; # change for release versions +$VERSION = "4.000000"; # update after release (same format as perl $]) +$IS_DEVEL_BUILD = 1; # change for release versions # Used during the prerelease/release-candidate part of the official release # process. If you hacked up your SA, you should add a version_tag to your .cf diff --git lib/Mail/SpamAssassin/ArchiveIterator.pm lib/Mail/SpamAssassin/ArchiveIterator.pm index 66b2d3619..bf3206651 100644 --- lib/Mail/SpamAssassin/ArchiveIterator.pm +++ lib/Mail/SpamAssassin/ArchiveIterator.pm @@ -21,7 +21,7 @@ package Mail::SpamAssassin::ArchiveIterator; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Errno qw(ENOENT EACCES EBADF); diff --git lib/Mail/SpamAssassin/AsyncLoop.pm lib/Mail/SpamAssassin/AsyncLoop.pm index 3c9de547f..57a3f688f 100644 --- lib/Mail/SpamAssassin/AsyncLoop.pm +++ lib/Mail/SpamAssassin/AsyncLoop.pm @@ -35,7 +35,7 @@ package Mail::SpamAssassin::AsyncLoop; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Time::HiRes qw(time); @@ -257,6 +257,21 @@ filled-in with a query ID. sub bgsend_and_start_lookup { my($self, $domain, $type, $class, $ent, $cb, %options) = @_; + + # At this point the $domain should already be encoded to UTF-8 and + # IDN converted to ASCII-compatible encoding (ACE). Make sure this is + # really the case in order to be able to catch any leftover omissions. + if (utf8::is_utf8($domain)) { + utf8::encode($domain); + my($package, $filename, $line) = caller; + info("bgsend_and_start_lookup: Unicode domain name, expected octets: %s, ". + "called from %s line %d", $domain, $package, $line); + } elsif ($domain =~ tr/\x00-\x7F//c) { # is not all-ASCII + my($package, $filename, $line) = caller; + info("bgsend_and_start_lookup: non-ASCII domain name: %s, ". + "called from %s line %d", $domain, $package, $line); + } + $ent = {} if !$ent; $domain =~ s/\.+\z//s; # strip trailing dots, these sometimes still sneak in $ent->{id} = undef; diff --git lib/Mail/SpamAssassin/AutoWhitelist.pm lib/Mail/SpamAssassin/AutoWhitelist.pm index 7b7d1b4f0..632d8fc28 100644 --- lib/Mail/SpamAssassin/AutoWhitelist.pm +++ lib/Mail/SpamAssassin/AutoWhitelist.pm @@ -43,7 +43,7 @@ package Mail::SpamAssassin::AutoWhitelist; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use NetAddr::IP 4.000; diff --git lib/Mail/SpamAssassin/Bayes.pm lib/Mail/SpamAssassin/Bayes.pm index 26880779a..32dd9097d 100644 --- lib/Mail/SpamAssassin/Bayes.pm +++ lib/Mail/SpamAssassin/Bayes.pm @@ -34,7 +34,7 @@ package Mail::SpamAssassin::Bayes; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin; diff --git lib/Mail/SpamAssassin/Bayes/CombineChi.pm lib/Mail/SpamAssassin/Bayes/CombineChi.pm index d80e08e97..f14056453 100644 --- lib/Mail/SpamAssassin/Bayes/CombineChi.pm +++ lib/Mail/SpamAssassin/Bayes/CombineChi.pm @@ -30,7 +30,7 @@ package Mail::SpamAssassin::Bayes::Combine; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use POSIX qw(frexp); diff --git lib/Mail/SpamAssassin/Bayes/CombineNaiveBayes.pm lib/Mail/SpamAssassin/Bayes/CombineNaiveBayes.pm index 01682dd65..f1ed460f2 100644 --- lib/Mail/SpamAssassin/Bayes/CombineNaiveBayes.pm +++ lib/Mail/SpamAssassin/Bayes/CombineNaiveBayes.pm @@ -30,7 +30,7 @@ package Mail::SpamAssassin::Bayes::Combine; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; ########################################################################### diff --git lib/Mail/SpamAssassin/BayesStore.pm lib/Mail/SpamAssassin/BayesStore.pm index 5a63b96a8..afeae84de 100644 --- lib/Mail/SpamAssassin/BayesStore.pm +++ lib/Mail/SpamAssassin/BayesStore.pm @@ -30,7 +30,7 @@ package Mail::SpamAssassin::BayesStore; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin::Logger; diff --git lib/Mail/SpamAssassin/BayesStore/BDB.pm lib/Mail/SpamAssassin/BayesStore/BDB.pm index 2a99ef519..d0927c35a 100644 --- lib/Mail/SpamAssassin/BayesStore/BDB.pm +++ lib/Mail/SpamAssassin/BayesStore/BDB.pm @@ -31,7 +31,7 @@ package Mail::SpamAssassin::BayesStore::BDB; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Errno qw(EBADF); #use Data::Dumper; diff --git lib/Mail/SpamAssassin/BayesStore/DBM.pm lib/Mail/SpamAssassin/BayesStore/DBM.pm index 6ff83dec8..74e2f455a 100644 --- lib/Mail/SpamAssassin/BayesStore/DBM.pm +++ lib/Mail/SpamAssassin/BayesStore/DBM.pm @@ -19,7 +19,7 @@ package Mail::SpamAssassin::BayesStore::DBM; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Fcntl; diff --git lib/Mail/SpamAssassin/BayesStore/MySQL.pm lib/Mail/SpamAssassin/BayesStore/MySQL.pm index 14a0e1c71..8ad8679b7 100644 --- lib/Mail/SpamAssassin/BayesStore/MySQL.pm +++ lib/Mail/SpamAssassin/BayesStore/MySQL.pm @@ -38,7 +38,7 @@ package Mail::SpamAssassin::BayesStore::MySQL; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin::BayesStore::SQL; diff --git lib/Mail/SpamAssassin/BayesStore/PgSQL.pm lib/Mail/SpamAssassin/BayesStore/PgSQL.pm index 06e8a877d..0ee86fc30 100644 --- lib/Mail/SpamAssassin/BayesStore/PgSQL.pm +++ lib/Mail/SpamAssassin/BayesStore/PgSQL.pm @@ -41,7 +41,7 @@ package Mail::SpamAssassin::BayesStore::PgSQL; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin::BayesStore::SQL; diff --git lib/Mail/SpamAssassin/BayesStore/Redis.pm lib/Mail/SpamAssassin/BayesStore/Redis.pm index 17373771e..52470cc05 100644 --- lib/Mail/SpamAssassin/BayesStore/Redis.pm +++ lib/Mail/SpamAssassin/BayesStore/Redis.pm @@ -118,7 +118,7 @@ package Mail::SpamAssassin::BayesStore::Redis; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Errno qw(EBADF); use Mail::SpamAssassin::Util qw(untaint_var); diff --git lib/Mail/SpamAssassin/BayesStore/SDBM.pm lib/Mail/SpamAssassin/BayesStore/SDBM.pm index 3577e49ca..9a96c5614 100644 --- lib/Mail/SpamAssassin/BayesStore/SDBM.pm +++ lib/Mail/SpamAssassin/BayesStore/SDBM.pm @@ -19,7 +19,7 @@ package Mail::SpamAssassin::BayesStore::SDBM; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Fcntl; diff --git lib/Mail/SpamAssassin/BayesStore/SQL.pm lib/Mail/SpamAssassin/BayesStore/SQL.pm index 88d1a733c..61b3bb7d5 100644 --- lib/Mail/SpamAssassin/BayesStore/SQL.pm +++ lib/Mail/SpamAssassin/BayesStore/SQL.pm @@ -31,7 +31,7 @@ package Mail::SpamAssassin::BayesStore::SQL; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Errno qw(EBADF); diff --git lib/Mail/SpamAssassin/Conf.pm lib/Mail/SpamAssassin/Conf.pm index 5dbd69636..85a065209 100644 --- lib/Mail/SpamAssassin/Conf.pm +++ lib/Mail/SpamAssassin/Conf.pm @@ -79,16 +79,15 @@ package Mail::SpamAssassin::Conf; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; -use Mail::SpamAssassin::Util; use Mail::SpamAssassin::NetSet; use Mail::SpamAssassin::Constants qw(:sa :ip); use Mail::SpamAssassin::Conf::Parser; use Mail::SpamAssassin::Logger; use Mail::SpamAssassin::Util::TieOneStringHash; -use Mail::SpamAssassin::Util qw(untaint_var); +use Mail::SpamAssassin::Util qw(untaint_var idn_to_ascii); use File::Spec; use vars qw{ @@ -360,24 +359,24 @@ for the whitelisting rule to fire. The first parameter is a sender's e-mail address to whitelist, and the second is a string to match the relay's rDNS, or its IP address. Matching is case-insensitive. -This second parameter is matched against the TCP-info information field as -provided in a FROM clause of a trace information (i.e. the Received header +This second parameter is matched against a TCP-info information field as +provided in a FROM clause of a trace information (i.e. in a Received header field, see RFC 5321). Only the Received header fields inserted by trusted -hosts are considered. This parameter can either be a full hostname, or the -domain component of that hostname, or an IP address in square brackets. -The reverse DNS lookup is done by a MTA, not by SpamAssassin. +hosts are considered. This parameter can either be a full hostname, or a +domain component of that hostname, or an IP address (optionally followed +by a slash and a prefix length) in square brackets. The address prefix +(mask) length with a slash may stand within brackets along with an address, +or may follow the bracketed address. Reverse DNS lookup is done by an MTA, +not by SpamAssassin. -In case of an IPv4 address in brackets, it may be truncated on classful -boundaries to cover whole subnets, e.g. C<[10.1.2.3]>, C<[10.1.2]>, -C<[10.1]>, C<[10]>. CIDR notation is currently not supported, nor is -IPv6. The matching on IP address is mainly provided to cover rare cases -where whitelisting of a sending MTA is desired which does not have a -correct reverse DNS configured. +For backward compatibility as an alternative to a CIDR notation, an IPv4 +address in brackets may be truncated on classful boundaries to cover whole +subnets, e.g. C<[10.1.2.3]>, C<[10.1.2]>, C<[10.1]>, C<[10]>. In other words, if the host that connected to your MX had an IP address 192.0.2.123 that mapped to 'sendinghost.example.org', you should specify -C, or C, or C<[192.0.2.123]> or -C<[192.0.2]> here. +C, or C, or C<[192.0.2.123]>, or +C<[192.0.2.0/24]>, or C<[192.0.2]> here. Note that this requires that C be correct. For simple cases, it will be, but for a complex network you may get better results @@ -390,8 +389,12 @@ result in the generated Received header field according to RFC 5321. e.g. whitelist_from_rcvd joe@example.com example.com - whitelist_from_rcvd *@axkit.org sergeant.org + whitelist_from_rcvd *@* mail.example.org whitelist_from_rcvd *@axkit.org [192.0.2.123] + whitelist_from_rcvd *@axkit.org [192.0.2.0/24] + whitelist_from_rcvd *@axkit.org [192.0.2.0]/24 + whitelist_from_rcvd *@axkit.org [2001:db8:1234::/48] + whitelist_from_rcvd *@axkit.org [2001:db8:1234::]/48 =item def_whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net @@ -2475,7 +2478,7 @@ length to no more than 50 characters. type => $CONF_TYPE_HASH_KEY_VALUE, }); -=item report_charset CHARSET (default: unset) +=item report_charset CHARSET (default: UTF-8) Set the MIME Content-Type charset used for the text/plain report which is attached to spam mail messages. @@ -2484,7 +2487,7 @@ is attached to spam mail messages. push (@cmds, { setting => 'report_charset', - default => '', + default => 'UTF-8', type => $CONF_TYPE_STRING, }); @@ -3473,8 +3476,11 @@ subdomain of the specified zone. =item util_rb_tld tld1 tld2 ... -This option maintains list of valid TLDs in the RegistryBoundaries code. -TLDs include things like com, net, org, etc. +This option maintains a list of valid TLDs in the RegistryBoundaries code. +Top level domains (TLD) include things like com, net, org, xn--p1ai, рф, ... +International domain names may be specified in ASCII-compatible encoding (ACE), +e.g. xn--p1ai, xn--qxam, or with Unicode labels encoded as UTF-8 octets, +e.g. рф, ελ. =cut @@ -3537,7 +3543,7 @@ TLDs include things like com, net, org, etc. xn--wgbh1c xn--wgbl6a xn--xhq521b xn--xkc2al3hye2a xn--xkc2dl3a5ee0h xn--yfro4i67o xn--ygbi2ammx xn--zfr164b xxx xyz yachts yandex ye yokohama youtube yt za zm zone zw - /) { $self->{valid_tlds}{lc $_} = 1; } + /) { $self->{valid_tlds}{idn_to_ascii($_)} = 1 } push (@cmds, { setting => 'util_rb_tld', @@ -3551,7 +3557,7 @@ TLDs include things like com, net, org, etc. return $INVALID_VALUE; } foreach (split(/\s+/, $value)) { - $self->{valid_tlds}{lc $_} = 1; + $self->{valid_tlds}{idn_to_ascii($_)} = 1; } dbg("config: added tld list - $value"); } @@ -3560,7 +3566,9 @@ TLDs include things like com, net, org, etc. =item util_rb_2tld 2tld-1.tld 2tld-2.tld ... This option maintains list of valid 2nd-level TLDs in the RegistryBoundaries -code. 2TLDs include things like co.uk, fed.us, etc. +code. 2TLDs include things like co.uk, fed.us, etc. International domain +names may be specified in ASCII-compatible encoding (ACE), or with Unicode +labels encoded as UTF-8 octets. =cut @@ -3731,7 +3739,7 @@ code. 2TLDs include things like co.uk, fed.us, etc. net.ye org.ye ac.za alt.za bourse.za city.za co.za edu.za gov.za law.za mil.za net.za ngo.za nom.za org.za school.za tm.za web.za ac.zm co.zm com.zm edu.zm gov.zm org.zm sch.zm ac.zw co.zw gov.zw org.zw - /) { $self->{two_level_domains}{lc $_} = 1; } + /) { $self->{two_level_domains}{idn_to_ascii($_)} = 1 } push (@cmds, { setting => 'util_rb_2tld', @@ -3745,7 +3753,7 @@ code. 2TLDs include things like co.uk, fed.us, etc. return $INVALID_VALUE; } foreach (split(/\s+/, $value)) { - $self->{two_level_domains}{lc $_} = 1; + $self->{two_level_domains}{idn_to_ascii($_)} = 1; } } }); @@ -3753,7 +3761,9 @@ code. 2TLDs include things like co.uk, fed.us, etc. =item util_rb_3tld 3tld1.some.tld 3tld2.other.tld ... This option maintains list of valid 3rd-level TLDs in the RegistryBoundaries -code. 3TLDs include things like demon.co.uk, plc.co.im, etc. +code. 3TLDs include things like demon.co.uk, plc.co.im, etc. International +domain names may be specified in ASCII-compatible encoding (ACE), or with +Unicode labels encoded as UTF-8 octets. =cut @@ -3762,7 +3772,7 @@ code. 3TLDs include things like demon.co.uk, plc.co.im, etc. # sa-update 20_aux_tlds.cf. foreach (qw/ demon.co.uk esc.edu.ar lkd.co.im plc.co.im - /) { $self->{three_level_domains}{lc $_} = 1; } + /) { $self->{three_level_domains}{idn_to_ascii($_)} = 1 } push (@cmds, { setting => 'util_rb_3tld', @@ -3776,7 +3786,7 @@ code. 3TLDs include things like demon.co.uk, plc.co.im, etc. return $INVALID_VALUE; } foreach (split(/\s+/, $value)) { - $self->{three_level_domains}{lc $_} = 1; + $self->{three_level_domains}{idn_to_ascii($_)} = 1; } } }); @@ -3797,9 +3807,9 @@ standard lists supplied by sa-update. unless (!defined $value || $value eq '') { return $INVALID_VALUE; } - $self->{valid_tlds} = (); - $self->{two_level_domains} = (); - $self->{three_level_domains} = (); + undef $self->{valid_tlds}; + undef $self->{two_level_domains}; + undef $self->{three_level_domains}; dbg("config: cleared tld lists"); } }); @@ -4615,12 +4625,12 @@ sub mtime { sub parse_scores_only { my ($self) = @_; - $_[0]->{parser}->parse ($_[1], 1); + $self->{parser}->parse ($_[1], 1); } sub parse_rules { my ($self) = @_; - $_[0]->{parser}->parse ($_[1], 0); + $self->{parser}->parse ($_[1], 0); } ########################################################################### diff --git lib/Mail/SpamAssassin/Conf/LDAP.pm lib/Mail/SpamAssassin/Conf/LDAP.pm index d1b566759..c25740559 100644 --- lib/Mail/SpamAssassin/Conf/LDAP.pm +++ lib/Mail/SpamAssassin/Conf/LDAP.pm @@ -45,7 +45,7 @@ use Mail::SpamAssassin::Logger; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw{ diff --git lib/Mail/SpamAssassin/Conf/Parser.pm lib/Mail/SpamAssassin/Conf/Parser.pm index e31c87b5f..0bd20e3e2 100644 --- lib/Mail/SpamAssassin/Conf/Parser.pm +++ lib/Mail/SpamAssassin/Conf/Parser.pm @@ -142,7 +142,7 @@ use Mail::SpamAssassin::NetSet; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw{ diff --git lib/Mail/SpamAssassin/Conf/SQL.pm lib/Mail/SpamAssassin/Conf/SQL.pm index b086bc9f9..c68d28bc2 100644 --- lib/Mail/SpamAssassin/Conf/SQL.pm +++ lib/Mail/SpamAssassin/Conf/SQL.pm @@ -45,7 +45,7 @@ use Mail::SpamAssassin::Logger; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw{ diff --git lib/Mail/SpamAssassin/Constants.pm lib/Mail/SpamAssassin/Constants.pm index b4f64b27b..fd55680d2 100644 --- lib/Mail/SpamAssassin/Constants.pm +++ lib/Mail/SpamAssassin/Constants.pm @@ -253,10 +253,10 @@ use constant LOCALHOST => qr/ # an IP address, in IPv4 format only. # use constant IPV4_ADDRESS => qr/\b - (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\. - (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\. - (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\. - (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d) + (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\. + (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\. + (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\. + (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d) \b/ox; # --------------------------------------------------------------------------- @@ -267,10 +267,10 @@ use constant IP_ADDRESS => qr/ (?: \b(? qr/ (?:[a-f0-9]{1,4}:){4}:[a-f0-9]{1,4}: ) # and the IPv4 address appended to all of the 12 bytes above - (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\. - (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\. - (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\. - (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d) # no \b, we check later + (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\. + (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\. + (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\. + (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d) # no \b, we check later | # or (separately) a pure IPv6 address diff --git lib/Mail/SpamAssassin/DBBasedAddrList.pm lib/Mail/SpamAssassin/DBBasedAddrList.pm index 20bcfedbf..69d230287 100644 --- lib/Mail/SpamAssassin/DBBasedAddrList.pm +++ lib/Mail/SpamAssassin/DBBasedAddrList.pm @@ -19,7 +19,7 @@ package Mail::SpamAssassin::DBBasedAddrList; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Fcntl; diff --git lib/Mail/SpamAssassin/Dns.pm lib/Mail/SpamAssassin/Dns.pm index 55e1640f8..014ab2bf6 100644 --- lib/Mail/SpamAssassin/Dns.pm +++ lib/Mail/SpamAssassin/Dns.pm @@ -22,14 +22,14 @@ package Mail::SpamAssassin::PerMsgStatus; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin::Conf; use Mail::SpamAssassin::PerMsgStatus; use Mail::SpamAssassin::AsyncLoop; use Mail::SpamAssassin::Constants qw(:ip); -use Mail::SpamAssassin::Util qw(untaint_var am_running_on_windows); +use Mail::SpamAssassin::Util qw(untaint_var am_running_on_windows idn_to_ascii); use File::Spec; use IO::Socket; @@ -101,6 +101,7 @@ BEGIN { sub do_rbl_lookup { my ($self, $rule, $set, $type, $host, $subtest) = @_; + $host = idn_to_ascii($host); $host =~ s/\.\z//s; # strip a redundant trailing dot my $key = "dns:$type:$host"; my $existing_ent = $self->{async}->get_lookup($key); @@ -145,6 +146,7 @@ sub register_rbl_subtest { sub do_dns_lookup { my ($self, $rule, $type, $host) = @_; + $host = idn_to_ascii($host); $host =~ s/\.\z//s; # strip a redundant trailing dot my $key = "dns:$type:$host"; @@ -171,10 +173,11 @@ sub dnsbl_hit { if (substr($rule, 0, 2) eq "__") { # don't bother with meta rules } elsif ($answer->type eq 'TXT') { - # txtdata returns a non- zone-file-format encoded result, unlike rdatastr; + # txtdata returns a non- zone-file-format encoded result, unlike rdstring; # avoid space-separated RDATA fields if possible, # txtdata provides a list of strings in a list context since Net::DNS 0.69 $log = join('',$answer->txtdata); + utf8::encode($log) if utf8::is_utf8($log); local $1; $log =~ s{ (?}xgi; } else { # assuming $answer->type eq 'A' @@ -213,14 +216,27 @@ sub dnsbl_hit { sub dnsbl_uri { my ($self, $question, $answer) = @_; - my $qname = $question->qname; - - # txtdata returns a non- zone-file-format encoded result, unlike rdatastr; - # avoid space-separated RDATA fields if possible, - # txtdata provides a list of strings in a list context since Net::DNS 0.69 - # - my $rdatastr = $answer->UNIVERSAL::can('txtdata') ? join('',$answer->txtdata) + my $rdatastr; + if ($answer->UNIVERSAL::can('txtdata')) { + # txtdata returns a non- zone-file-format encoded result, unlike rdstring; + # avoid space-separated RDATA fields if possible, + # txtdata provides a list of strings in a list context since Net::DNS 0.69 + $rdatastr = join('',$answer->txtdata); + } else { + # rdatastr() is historical/undocumented, use rdstring() since Net::DNS 0.69 + $rdatastr = $answer->UNIVERSAL::can('rdstring') ? $answer->rdstring : $answer->rdatastr; + # encoded in a RFC 1035 zone file format (escaped), decode it + $rdatastr =~ s{ \\ ( [0-9]{3} | (?![0-9]{3}) . ) } + { length($1)==3 && $1 <= 255 ? chr($1) : $1 }xgse; + } + # Bug 7236: Net::DNS attempts to decode text strings in a TXT record as + # UTF-8 since version 0.69, which is undesired: octets failing the UTF-8 + # decoding are converted to a Unicode "replacement character" U+FFFD, and + # ASCII text is unnecessarily flagged as perl native characters. + utf8::encode($rdatastr) if utf8::is_utf8($rdatastr); + + my $qname = $question->qname; if (defined $qname && defined $rdatastr) { my $qclass = $question->qclass; my $qtype = $question->qtype; @@ -267,8 +283,13 @@ sub process_dnsbl_result { my $answ_type = $answer->type; # TODO: there are some CNAME returns that might be useful next if ($answ_type ne 'A' && $answ_type ne 'TXT'); - # skip any A record that isn't on 127/8 - next if ($answ_type eq 'A' && $answer->rdatastr !~ /^127\./); + if ($answ_type eq 'A') { + # Net::DNS::RR::A::address() is available since Net::DNS 0.69 + my $ip_address = $answer->UNIVERSAL::can('address') ? $answer->address + : $answer->rdatastr; + # skip any A record that isn't on 127.0.0.0/8 + next if $ip_address !~ /^127\./; + } for my $rule (@{$rules}) { $self->dnsbl_hit($rule, $question, $answer); } @@ -284,12 +305,25 @@ sub process_dnsbl_result { sub process_dnsbl_set { my ($self, $set, $question, $answer) = @_; - # txtdata returns a non- zone-file-format encoded result, unlike rdatastr; - # avoid space-separated RDATA fields if possible, - # txtdata provides a list of strings in a list context since Net::DNS 0.69 - # - my $rdatastr = $answer->UNIVERSAL::can('txtdata') ? join('',$answer->txtdata) + my $rdatastr; + if ($answer->UNIVERSAL::can('txtdata')) { + # txtdata returns a non- zone-file-format encoded result, unlike rdstring; + # avoid space-separated RDATA fields if possible, + # txtdata provides a list of strings in a list context since Net::DNS 0.69 + $rdatastr = join('',$answer->txtdata); + } else { + # rdatastr() is historical/undocumented, use rdstring() since Net::DNS 0.69 + $rdatastr = $answer->UNIVERSAL::can('rdstring') ? $answer->rdstring : $answer->rdatastr; + # encoded in a RFC 1035 zone file format (escaped), decode it + $rdatastr =~ s{ \\ ( [0-9]{3} | (?![0-9]{3}) . ) } + { length($1)==3 && $1 <= 255 ? chr($1) : $1 }xgse; + } + # Bug 7236: Net::DNS attempts to decode text strings in a TXT record as + # UTF-8 since version 0.69, which is undesired: octets failing the UTF-8 + # decoding are converted to a Unicode "replacement character" U+FFFD, and + # ASCII text is unnecessarily flagged as perl native characters. + utf8::encode($rdatastr) if utf8::is_utf8($rdatastr); while (my ($subtest, $rule) = each %{ $self->{dnspost}->{$set} }) { next if $self->{tests_already_hit}->{$rule}; diff --git lib/Mail/SpamAssassin/DnsResolver.pm lib/Mail/SpamAssassin/DnsResolver.pm index ce51bee83..826e2cff2 100644 --- lib/Mail/SpamAssassin/DnsResolver.pm +++ lib/Mail/SpamAssassin/DnsResolver.pm @@ -37,7 +37,7 @@ package Mail::SpamAssassin::DnsResolver; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; require 5.008001; # needs utf8::is_utf8() @@ -45,7 +45,7 @@ require 5.008001; # needs utf8::is_utf8() use Mail::SpamAssassin; use Mail::SpamAssassin::Logger; use Mail::SpamAssassin::Constants qw(:ip); -use Mail::SpamAssassin::Util qw(untaint_var decode_dns_question_entry); +use Mail::SpamAssassin::Util qw(untaint_var decode_dns_question_entry idn_to_ascii); use Socket; use Errno qw(EADDRINUSE EACCES); @@ -581,7 +581,7 @@ sub new_dns_packet { # time, $domain, $type, $packet->id); 1; } or do { - # this can if a domain name in a query is invalid, or if a timeout signal + # get here if a domain name in a query is invalid, or if a timeout signal # happened to be trapped by this eval, or if Net::DNS signalled an error my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; # resignal if alarm went off @@ -592,6 +592,9 @@ sub new_dns_packet { }; if ($packet) { + # RD flag needs to be set explicitly since Net::DNS 1.01, Bug 7223 + $packet->header->rd(1); + # my $udp_payload_size = $self->{res}->udppacketsize; my $udp_payload_size = $self->{conf}->{dns_options}->{edns}; if ($udp_payload_size && $udp_payload_size > 512) { @@ -671,7 +674,7 @@ be used, like so: my $id = $self->{resolver}->bgsend($domain, $type, undef, sub { my ($reply, $reply_id, $timestamp) = @_; - $self->got_a_reply ($reply, $reply_id); + $self->got_a_reply($reply, $reply_id); }); The callback can ignore the reply as an invalid packet sent to the listening @@ -722,6 +725,37 @@ sub bgsend { ########################################################################### +=item $id = $res->bgread() + +Similar to C. Reads a DNS packet from +a supplied socket, decodes it, and returns a Net::DNS::Packet object +if successful. Dies on error. + +=cut + +sub bgread() { + my ($self) = @_; + my $sock = $self->{sock}; + my $packetsize = $self->{res}->udppacketsize; + $packetsize = 512 if $packetsize < 512; # just in case + my $data = ''; + my $peeraddr = $sock->recv($data, $packetsize+256); # with some size margin for troubleshooting + defined $peeraddr or die "bgread: recv() failed: $!"; + my $peerhost = $sock->peerhost; + $data ne '' or die "bgread: received empty packet from $peerhost"; + dbg("dns: bgread: received %d bytes from %s", length($data), $peerhost); + my($answerpkt, $decoded_length) = Net::DNS::Packet->new(\$data); + $answerpkt or die "bgread: decoding DNS packet failed: $@"; + $answerpkt->answerfrom($peerhost); + if (defined $decoded_length && $decoded_length ne "" && $decoded_length != length($data)) { + warn sprintf("bgread: received a %d bytes packet from %s, decoded %d bytes\n", + length($data), $peerhost, $decoded_length); + } + return $answerpkt; +} + +########################################################################### + =item $nfound = $res->poll_responses() See if there are any C reply packets ready, and return @@ -769,13 +803,24 @@ sub poll_responses { $timeout = 0; # next time around collect whatever is available, then exit last if $nfound == 0; - my $packet = $self->{res}->bgread($self->{sock}); + my $packet; + # Bug 7265, use our own bgread() below + # $packet = $self->{res}->bgread($self->{sock}); + eval { + $packet = $self->bgread(); # Bug 7265, use our own bgread() + } or do { + undef $packet; + my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; + # resignal if alarm went off + die $eval_stat if $eval_stat =~ /__alarm__ignore__\(.*\)/s; + info("dns: bad dns reply: %s", $eval_stat); + }; if (!$packet) { - my $dns_err = $self->{res}->errorstring; - # resignal if alarm went off - die "dns (3) $dns_err\n" if $dns_err =~ /__alarm__ignore__\(.*\)/s; - info("dns: bad dns reply: $dns_err"); + # error already reported above +# my $dns_err = $self->{res}->errorstring; +# die "dns (3) $dns_err\n" if $dns_err =~ /__alarm__ignore__\(.*\)/s; +# info("dns: bad dns reply: $dns_err"); } else { my $header = $packet->header; if (!$header) { @@ -861,7 +906,8 @@ Emulates C. This subroutine is a simple synchronous leftover from SpamAssassin version 3.3 and does not participate in packet query caching and callback grouping as implemented by AsyncLoop::bgsend_and_start_lookup(). As such it should -be avoided for mainstream usage. +be avoided for mainstream usage. Currently used through Mail::SPF::Server +by the SPF plugin. =cut @@ -874,8 +920,9 @@ sub send { # using some arbitrary encoding (they are normally just 7-bit ascii # characters anyway, just need to get rid of the utf8 flag). Bug 6959 # Most if not all af these come from a SPF plugin. + # (was a call to utf8::encode($name), now we prefer a proper idn_to_ascii) # - utf8::encode($name); + $name = idn_to_ascii($name); my $retrans = $self->{retrans}; my $retries = $self->{retry}; diff --git lib/Mail/SpamAssassin/Locales.pm lib/Mail/SpamAssassin/Locales.pm index f932c23c5..f3afbb36e 100644 --- lib/Mail/SpamAssassin/Locales.pm +++ lib/Mail/SpamAssassin/Locales.pm @@ -19,7 +19,7 @@ package Mail::SpamAssassin::Locales; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw{ diff --git lib/Mail/SpamAssassin/Locker.pm lib/Mail/SpamAssassin/Locker.pm index dd550f94b..c5e0bd6f2 100644 --- lib/Mail/SpamAssassin/Locker.pm +++ lib/Mail/SpamAssassin/Locker.pm @@ -19,7 +19,7 @@ package Mail::SpamAssassin::Locker; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Fcntl; use Time::HiRes (); diff --git lib/Mail/SpamAssassin/Locker/Flock.pm lib/Mail/SpamAssassin/Locker/Flock.pm index 39fd7ad19..ddfa1cb62 100644 --- lib/Mail/SpamAssassin/Locker/Flock.pm +++ lib/Mail/SpamAssassin/Locker/Flock.pm @@ -19,7 +19,7 @@ package Mail::SpamAssassin::Locker::Flock; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin; diff --git lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm index 516efc2b1..3ea229edd 100644 --- lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm +++ lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm @@ -19,7 +19,7 @@ package Mail::SpamAssassin::Locker::UnixNFSSafe; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin; diff --git lib/Mail/SpamAssassin/Locker/Win32.pm lib/Mail/SpamAssassin/Locker/Win32.pm index 1990ba05b..d62db5539 100644 --- lib/Mail/SpamAssassin/Locker/Win32.pm +++ lib/Mail/SpamAssassin/Locker/Win32.pm @@ -19,7 +19,7 @@ package Mail::SpamAssassin::Locker::Win32; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Fcntl; diff --git lib/Mail/SpamAssassin/Logger.pm lib/Mail/SpamAssassin/Logger.pm index 86b6e7b70..c6118e81d 100644 --- lib/Mail/SpamAssassin/Logger.pm +++ lib/Mail/SpamAssassin/Logger.pm @@ -37,7 +37,7 @@ package Mail::SpamAssassin::Logger; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; BEGIN { diff --git lib/Mail/SpamAssassin/Logger/File.pm lib/Mail/SpamAssassin/Logger/File.pm index a0d39a9ad..97af98901 100644 --- lib/Mail/SpamAssassin/Logger/File.pm +++ lib/Mail/SpamAssassin/Logger/File.pm @@ -31,16 +31,23 @@ package Mail::SpamAssassin::Logger::File; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use POSIX (); use Time::HiRes (); use Mail::SpamAssassin::Logger; +use Mail::SpamAssassin::Util qw(am_running_on_windows); use vars qw(@ISA); @ISA = (); +# ADDING OS-DEPENDENT LINE TERMINATOR - BUG 6456 +my $eol = "\n"; +if (am_running_on_windows()) { + $eol = "\r\n"; +} + sub new { my $class = shift; @@ -54,7 +61,7 @@ sub new { $self->{timestamp_fmt} = $params{timestamp_fmt}; if (! $self->init()) { - die "logger: file initialization failed\n"; + die "logger: file initialization failed$eol"; } return($self); @@ -75,7 +82,7 @@ sub init { return 1; } else { - warn "logger: failed to open file $self->{filename}: $!\n"; + warn "logger: failed to open file $self->{filename}: $!$eol"; return 0; } } @@ -94,8 +101,8 @@ sub log_message { } $timestamp .= ' ' if $timestamp ne ''; - my($nwrite) = syswrite(STDLOG, sprintf("%s[%s] %s: %s\n", - $timestamp, $$, $level, $msg)); + my($nwrite) = syswrite(STDLOG, sprintf("%s[%s] %s: %s%s", + $timestamp, $$, $level, $msg, $eol)); defined $nwrite or warn "error writing to log file: $!"; } diff --git lib/Mail/SpamAssassin/Logger/Stderr.pm lib/Mail/SpamAssassin/Logger/Stderr.pm index ea7359396..bfadf1175 100644 --- lib/Mail/SpamAssassin/Logger/Stderr.pm +++ lib/Mail/SpamAssassin/Logger/Stderr.pm @@ -31,7 +31,7 @@ package Mail::SpamAssassin::Logger::Stderr; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use POSIX (); @@ -40,6 +40,17 @@ use Time::HiRes (); use vars qw(@ISA); @ISA = (); +# ADDING OS-DEPENDENT LINE TERMINATOR - BUG 6456 + +# Using Mail::SpamAssassin::Util::am_running_on_windows() leads to circular +# dependencies. So, we are duplicating the code instead. +use constant RUNNING_ON_WINDOWS => ($^O =~ /^(?:mswin|dos|os2)/oi); + +my $eol = "\n"; +if (RUNNING_ON_WINDOWS) { + $eol = "\r\n"; +} + sub new { my $class = shift; @@ -61,8 +72,9 @@ sub log_message { if (!defined $fmt) { # default since 3.3.0 my $now = Time::HiRes::time; - $timestamp = sprintf("%s:%06.3f", - POSIX::strftime("%b %d %H:%M", localtime($now)), $now-int($now/60)*60); + my $datetime = POSIX::strftime("%b %d %H:%M", localtime($now)); + utf8::encode($datetime) if utf8::is_utf8($datetime); # Bug 7305 + $timestamp = sprintf("%s:%06.3f", $datetime, $now-int($now/60)*60); # Bug 6329: %e is not in a POSIX standard, use %d instead and edit local $1; $timestamp =~ s/^(\S+\s+)0/$1 /; } elsif ($fmt eq '') { @@ -72,8 +84,8 @@ sub log_message { } $timestamp .= ' ' if $timestamp ne ''; - my($nwrite) = syswrite(STDERR, sprintf("%s[%d] %s: %s\n", - $timestamp, $$, $level, $msg)); + my($nwrite) = syswrite(STDERR, sprintf("%s[%d] %s: %s%s", + $timestamp, $$, $level, $msg, $eol)); defined $nwrite or warn "error writing to log file: $!"; } diff --git lib/Mail/SpamAssassin/Logger/Syslog.pm lib/Mail/SpamAssassin/Logger/Syslog.pm index 50d77bb1a..4be908078 100644 --- lib/Mail/SpamAssassin/Logger/Syslog.pm +++ lib/Mail/SpamAssassin/Logger/Syslog.pm @@ -31,7 +31,7 @@ package Mail::SpamAssassin::Logger::Syslog; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use POSIX qw(:sys_wait_h setsid sigprocmask); diff --git lib/Mail/SpamAssassin/MailingList.pm lib/Mail/SpamAssassin/MailingList.pm index b98cd4c57..4db87d115 100644 --- lib/Mail/SpamAssassin/MailingList.pm +++ lib/Mail/SpamAssassin/MailingList.pm @@ -24,7 +24,7 @@ package Mail::SpamAssassin::PerMsgStatus; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; sub detect_mailing_list { diff --git lib/Mail/SpamAssassin/Message.pm lib/Mail/SpamAssassin/Message.pm index ac8ca673a..406ca6940 100644 --- lib/Mail/SpamAssassin/Message.pm +++ lib/Mail/SpamAssassin/Message.pm @@ -1040,7 +1040,7 @@ sub _parse_normal { # attempt to figure out a name for this attachment if there is one ... my $disp = $msg->header('content-disposition') || ''; - if ($disp =~ /name="?([^\";]+)"?/i) { + if ($disp =~ /name=\s*"?([^";]+)"?/i) { $msg->{'name'} = $1; } elsif ($ct[3]) { @@ -1141,11 +1141,13 @@ sub get_body_text_array_common { } # whitespace handling (warning: small changes have large effects!) - $text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed + $text =~ s/\n+\s*\n+/\x00/gs; # double newlines => null # $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace (incl. VT, NBSP) => space - $text =~ tr/ \t\n\r\x0b/ /s; # whitespace (incl. VT) => space - $text =~ tr/\f/\n/; # form feeds => newline +# $text =~ tr/ \t\n\r\x0b/ /s; # whitespace (incl. VT) => single space + $text =~ s/\s+/ /gs; # Unicode whitespace => single space + $text =~ tr/\x00/\n/; # null => newline + utf8::encode($text) if utf8::is_utf8($text); my @textary = split_into_array_of_short_lines($text); $self->{$key} = \@textary; diff --git lib/Mail/SpamAssassin/Message/Metadata.pm lib/Mail/SpamAssassin/Message/Metadata.pm index d7989c58f..e32da1f83 100644 --- lib/Mail/SpamAssassin/Message/Metadata.pm +++ lib/Mail/SpamAssassin/Message/Metadata.pm @@ -50,7 +50,7 @@ package Mail::SpamAssassin::Message::Metadata; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin; diff --git lib/Mail/SpamAssassin/Message/Metadata/Received.pm lib/Mail/SpamAssassin/Message/Metadata/Received.pm index b4402a0cb..893c11afb 100644 --- lib/Mail/SpamAssassin/Message/Metadata/Received.pm +++ lib/Mail/SpamAssassin/Message/Metadata/Received.pm @@ -43,7 +43,7 @@ package Mail::SpamAssassin::Message::Metadata::Received; 1; package Mail::SpamAssassin::Message::Metadata; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin::Dns; @@ -434,7 +434,8 @@ sub parse_received_line { $auth = 'Postfix'; } # Communigate Pro - Bug 6495 adds HTTP as possible transmission method - elsif (/CommuniGate Pro (HTTP|SMTP)/ && / \(account /) { + # Bug 7277: XIMSS used by Pronto and other custom apps, IMAP supports XMIT extension + elsif (/CommuniGate Pro (HTTP|SMTP|XIMSS|IMAP)/ && / \(account /) { $auth = 'Communigate'; } # Microsoft Exchange (complete with syntax error) @@ -717,6 +718,11 @@ sub parse_received_line { $rdns = $1; $helo = $2; $ip = $3; $by = $4; goto enough; } + # Received: from mail-backend.DDDD.com (LHLO mail-backend.DDDD.com) (10.2.2.20) by mail-backend.DDDD.com with LMTP; Thu, 18 Jun 2015 16:50:56 -0700 (PDT) + if (/^(\S+) \(LHLO (\S*)\) \((${IP_ADDRESS})\) by (\S+) with LMTP/) { + $rdns = $1; $helo = $2; $ip = $3; $by = $4; goto enough; + } + # from dslb-082-083-045-064.pools.arcor-ip.net (EHLO homepc) [82.83.45.64] by mail.gmx.net (mp010) with SMTP; 03 Feb 2007 13:13:47 +0100 if (/^(\S+) \((?:HELO|EHLO) (\S*)\) \[(${IP_ADDRESS})\] by (\S+) \([^\)]+\) with (?:ESMTP|SMTP)/) { $rdns = $1; $helo = $2; $ip = $3; $by = $4; goto enough; diff --git lib/Mail/SpamAssassin/Message/Node.pm lib/Mail/SpamAssassin/Message/Node.pm index f1ad898ed..30d9b43a0 100644 --- lib/Mail/SpamAssassin/Message/Node.pm +++ lib/Mail/SpamAssassin/Message/Node.pm @@ -186,7 +186,7 @@ sub header { $dec_value =~ s/\n[ \t]+/ /gs; $dec_value =~ s/\s+$//s; $dec_value =~ s/^\s+//s; - push @{ $self->{'headers'}->{$key} }, $self->_decode_header($dec_value,$key); + push @{ $self->{'headers'}->{$key} }, _decode_header($dec_value,$key); push @{ $self->{'raw_headers'}->{$key} }, $raw_value; @@ -400,25 +400,23 @@ sub _html_render { # then encode into UTF-8 octets if requested. # sub _normalize { - my $self = $_[0]; -# my $data = $_[1]; # avoid copying large strings - my $charset_declared = $_[2]; - my $return_decoded = $_[3]; # true: Unicode characters, false: UTF-8 octets +# my $data = $_[0]; # avoid copying large strings + my $charset_declared = $_[1]; + my $return_decoded = $_[2]; # true: Unicode characters, false: UTF-8 octets + my $insist_on_declared_charset = $_[3]; # no FB_CROAK in Encode::decode - return $_[1] unless $self->{normalize} && $enc_utf8; - - warn "message: _normalize() was given characters, expected bytes: $_[1]\n" - if utf8::is_utf8($_[1]); + warn "message: _normalize() was given characters, expected bytes: $_[0]\n" + if utf8::is_utf8($_[0]); # workaround for Encode::decode taint laundering bug [rt.cpan.org #84879] - my $data_taint = substr($_[1], 0, 0); # empty string, tainted like $data + my $data_taint = substr($_[0], 0, 0); # empty string, tainted like $data if (!defined $charset_declared || $charset_declared eq '') { $charset_declared = 'us-ascii'; } # number of characters with code above 127 - my $cnt_8bits = $_[1] =~ tr/\x00-\x7F//c; + my $cnt_8bits = $_[0] =~ tr/\x00-\x7F//c; if (!$cnt_8bits && $charset_declared =~ @@ -426,7 +424,7 @@ sub _normalize { ISO646-US )\z/xsi) { # declared as US-ASCII (a.k.a. ANSI X3.4-1986) and it really is dbg("message: kept, charset is US-ASCII as declared"); - return $_[1]; # is all-ASCII, no need for decoding + return $_[0]; # is all-ASCII, no need for decoding } if (!$cnt_8bits && @@ -436,31 +434,21 @@ sub _normalize { Big5 | GBK | GB[ -]?18030 (?:-20\d\d)? )\z/xsi) { # declared as extended ASCII, but it is actually a plain 7-bit US-ASCII dbg("message: kept, charset is US-ASCII, declared %s", $charset_declared); - return $_[1]; # is all-ASCII, no need for decoding + return $_[0]; # is all-ASCII, no need for decoding } # Try first to strictly decode based on a declared character set. my $rv; - if ($charset_declared =~ /^UTF-?8\z/i) { - # attempt decoding as strict UTF-8 (flags: FB_CROAK | LEAVE_SRC) - if (eval { $rv = $enc_utf8->decode($_[1], 1|8); defined $rv }) { - dbg("message: decoded as declared charset UTF-8"); - return $_[1] if !$return_decoded; - $rv .= $data_taint; # carry taintedness over, avoid Encode bug - return $rv; # decoded - } else { - dbg("message: failed decoding as declared charset UTF-8"); - }; - - } elsif ($cnt_8bits && - eval { $rv = $enc_utf8->decode($_[1], 1|8); defined $rv }) { + if ($cnt_8bits && !$insist_on_declared_charset && + eval { $rv = $enc_utf8->decode($_[0], 1|8); defined $rv }) { dbg("message: decoded as charset UTF-8, declared %s", $charset_declared); - return $_[1] if !$return_decoded; + return $_[0] if !$return_decoded; $rv .= $data_taint; # carry taintedness over, avoid Encode bug return $rv; # decoded - } elsif ($charset_declared =~ /^(?:US-)?ASCII\z/i) { + } elsif ($charset_declared =~ /^(?:US-)?ASCII\z/i + && !$insist_on_declared_charset) { # declared as US-ASCII but contains 8-bit characters, makes no sense # to attempt decoding first as strict US-ASCII as we know it would fail @@ -484,8 +472,11 @@ sub _normalize { my($chset, $decoder); if ($charset_declared =~ /^(?: ISO-?8859-1 | Windows-1252 | CP1252 )\z/xi) { $chset = 'Windows-1252'; $decoder = $enc_w1252; + } elsif ($charset_declared =~ /^UTF-?8\z/i) { + $chset = 'UTF-8'; $decoder = $enc_utf8; } else { - $chset = $charset_declared; $decoder = Encode::find_encoding($chset); + $chset = $charset_declared; + $decoder = Encode::find_encoding($chset); if (!$decoder && $chset =~ /^GB[ -]?18030(?:-20\d\d)?\z/i) { $decoder = Encode::find_encoding('GBK'); # a subset of GB18030 dbg("message: no decoder for a declared charset %s, using GBK", @@ -496,7 +487,9 @@ sub _normalize { dbg("message: failed decoding, no decoder for a declared charset %s", $chset); } else { - eval { $rv = $decoder->decode($_[1], 1|8) }; # FB_CROAK | LEAVE_SRC + my $check_flags = Encode::LEAVE_SRC; # 0x0008 + $check_flags |= Encode::FB_CROAK unless $insist_on_declared_charset; + eval { $rv = $decoder->decode($_[0], $check_flags) }; if (lc $chset eq lc $charset_declared) { dbg("message: %s as declared charset %s", defined $rv ? 'decoded' : 'failed decoding', $charset_declared); @@ -517,15 +510,15 @@ sub _normalize { if (!defined $rv && !$cnt_8bits) { dbg("message: kept, guessed charset is US-ASCII, declared %s", $charset_declared); - return $_[1]; # is all-ASCII, no need for decoding + return $_[0]; # is all-ASCII, no need for decoding } elsif (!defined $rv && $enc_w1252 && # ASCII NBSP (c) SHY ' " ... '".- TM - $_[1] !~ tr/\x00-\x7F\xA0\xA9\xAD\x82\x84\x85\x91-\x97\x99//c) + $_[0] !~ tr/\x00-\x7F\xA0\xA9\xAD\x82\x84\x85\x91-\x97\x99//c) { # ASCII + NBSP + SHY + some punctuation characters # NBSP (A0) and SHY (AD) are at the same position in ISO-8859-* too # consider also: AE (r), 80 Euro - eval { $rv = $enc_w1252->decode($_[1], 1|8) }; # FB_CROAK | LEAVE_SRC + eval { $rv = $enc_w1252->decode($_[0], 1|8) }; # FB_CROAK | LEAVE_SRC # the above can't fail, but keep code general just in case dbg("message: %s as guessed charset %s, declared %s", defined $rv ? 'decoded' : 'failed decoding', @@ -541,7 +534,7 @@ sub _normalize { dbg("message: Encode::Detect::Detector not available, declared %s failed", $charset_declared); } else { - my $charset_detected = Encode::Detect::Detector::detect($_[1]); + my $charset_detected = Encode::Detect::Detector::detect($_[0]); if ($charset_detected && lc $charset_detected ne lc $charset_declared) { my $decoder = Encode::find_encoding($charset_detected); if (!$decoder && $charset_detected =~ /^GB[ -]?18030(?:-20\d\d)?\z/i) { @@ -553,7 +546,7 @@ sub _normalize { dbg("message: failed decoding, no decoder for a detected charset %s", $charset_detected); } else { - eval { $rv = $decoder->decode($_[1], 1|8) }; # FB_CROAK | LEAVE_SRC + eval { $rv = $decoder->decode($_[0], 1|8) }; # FB_CROAK | LEAVE_SRC dbg("message: %s as detected charset %s, declared %s", defined $rv ? 'decoded' : 'failed decoding', $charset_detected, $charset_declared); @@ -563,14 +556,14 @@ sub _normalize { if (!defined $rv) { # all decoding attempts failed so far, probably garbage # go for Windows-1252 which can't fail - eval { $rv = $enc_w1252->decode($_[1]) }; + eval { $rv = $enc_w1252->decode($_[0]) }; dbg("message: %s as last-resort charset %s, declared %s", defined $rv ? 'decoded' : 'failed decoding', 'Windows-1252', $charset_declared); } if (!defined $rv) { # just in case - all decoding attempts failed so far - return $_[1]; # garbage-in / garbage-out, return unchanged octets + return $_[0]; # garbage-in / garbage-out, return unchanged octets } # decoding octets to characters was successful if (!$return_decoded) { @@ -618,25 +611,34 @@ sub rendered { # Provide input to HTML::Parser as Unicode characters # which avoids a HTML::Parser bug in utf8_mode # https://rt.cpan.org/Public/Bug/Display.html?id=99755 + # Note: the above bug was fixed in HTML-Parser 3.72, January 2016. # Avoid unnecessary step of encoding-then-decoding by telling # subroutine _normalize() to return Unicode text. See Bug 7133 # $character_semantics = 1; # $text will be in characters - $text = $self->_normalize($text, $self->{charset}, 1); # bytes to chars + $text = _normalize($text, $self->{charset}, 1); # bytes to chars } elsif (!defined $self->{charset} || $self->{charset} =~ /^(?:US-ASCII|UTF-8)\z/i) { - # With some luck input can be interpreted as UTF-8, do not warn. - # It is still possible to hit the HTML::Parses utf8_mode bug however. + if ($text !~ tr/\x00-\x7F//c) { + # all-ASCII, keep as octets (utf8 flag off) + } else { # non-ASCII, try UTF-8 + my $rv; + # with some luck input can be interpreted as UTF-8 + if (eval { $rv = $enc_utf8->decode($text, 1|8); defined $rv }) { + $text = $rv; # decoded to perl characters + $character_semantics = 1; # $text will be in characters + }; + } } else { dbg("message: 'normalize_charset' is off, encoding will likely ". "be misinterpreted; declared charset: %s", $self->{charset}); } - # the 0 requires decoded HTML results to be in bytes (not characters) - my $html = Mail::SpamAssassin::HTML->new($character_semantics,0); # object + # the 1 requires decoded HTML results to be in characters (utf8 flag on) + my $html = Mail::SpamAssassin::HTML->new($character_semantics,1); # object $html->parse($text); # parse+render text - # resulting HTML-decoded text is in bytes, likely encoded as UTF-8 + # resulting HTML-decoded text is in perl characters (utf8 flag on) $self->{rendered} = $html->get_rendered_text(); $self->{visible_rendered} = $html->get_rendered_text(invisible => 0); $self->{invisible_rendered} = $html->get_rendered_text(invisible => 1); @@ -645,11 +647,25 @@ sub rendered { # end-of-document result values that require looking at the text my $r = $self->{html_results}; # temporary reference for brevity - # count the number of spaces in the rendered text (likely UTF-8 octets) - my $space = $self->{rendered} =~ tr/ \t\n\r\x0b//; + # count the number of spaces in the rendered text + my $space; + if (utf8::is_utf8($self->{rendered})) { + my $str = $self->{rendered}; + $str =~ s/\S+//g; # delete non-whitespace Unicode characters + $space = length $str; # count remaining Unicode space characters + undef $str; # deallocate storage + dbg("message: spaces (Unicode) in HTML: %d out of %d%s", + $space, length $self->{rendered}, + $character_semantics ? '' : ', octets!?'); + } else { + $space = $self->{rendered} =~ tr/ \t\n\r\x0b//; + dbg("message: spaces (octets) in HTML: %d out of %d%s", + $space, length $self->{rendered}, + $character_semantics ? ', chars!?' : ''); + } # we may want to add the count of other Unicode whitespace characters - $r->{html_length} = length $self->{rendered}; # bytes (likely UTF-8) + $r->{html_length} = length $self->{rendered}; # perl characters count $r->{non_space_len} = $r->{html_length} - $space; $r->{ratio} = ($text_len - $r->{html_length}) / $text_len if $text_len; } @@ -657,7 +673,16 @@ sub rendered { else { # plain text if ($self->{normalize} && $enc_utf8) { # request transcoded result as UTF-8 octets! - $text = $self->_normalize($text, $self->{charset}, 0); + $text = _normalize($text, $self->{charset}, 1); # bytes to chars + } elsif (!defined $self->{charset} || + $self->{charset} =~ /^(?:US-ASCII|UTF-8)\z/i) { + if ($text =~ tr/\x00-\x7F//c) { # non-ASCII, try UTF-8 + my $rv; + # with some luck input can be interpreted as UTF-8 + if (eval { $rv = $enc_utf8->decode($text, 1|8); defined $rv }) { + $text = $rv; # decoded to perl characters + }; + } } $self->{rendered_type} = $self->{type}; $self->{rendered} = $self->{'visible_rendered'} = $text; @@ -760,15 +785,15 @@ sub delete_header { $self->{'header_order'} = \@neworder; } -# decode a header appropriately. don't bother adding it to the pod documents. -sub __decode_header { - my ( $self, $encoding, $cte, $data ) = @_; +# decode 'encoded-word' (RFC 2047, RFC 2231) +sub _decode_mime_encoded_word { + my ( $encoding, $cte, $data ) = @_; - if ( $cte eq 'B' ) { + if ( uc $cte eq 'B' ) { # base 64 encoded $data = Mail::SpamAssassin::Util::base64_decode($data); } - elsif ( $cte eq 'Q' ) { + elsif ( uc $cte eq 'Q' ) { # quoted printable # the RFC states that in the encoded text, "_" is equal to "=20" @@ -778,48 +803,111 @@ sub __decode_header { } else { # not possible since the input has already been limited to 'B' and 'Q' - die "message: unknown encoding type '$cte' in RFC2047 header"; + die "message: unknown encoding type '$cte' in RFC 2047 header"; + } + + if (defined $encoding) { + # RFC 2231 section 5: Language specification in Encoded Words + # =?US-ASCII*EN?Q?Keith_Moore?= + # strip optional language information following an asterisk + $encoding =~ s{ \* .* \z }{}xs; + + $data = _normalize($data, $encoding, 0, 1); # transcode to UTF-8 octets } - return $self->_normalize($data, $encoding, 0); # transcode to UTF-8 octets + # dbg("message: _decode_mime_encoded_word (%s, %s): %s", + # $cte, $encoding || '-', $data); + + return $data; # as UTF-8 octets } -# Decode base64 and quoted-printable in headers according to RFC2047. +# Decode base64 and quoted-printable in headers according to RFC 2047. # sub _decode_header { - my($self, $header_field_body, $header_field_name) = @_; + my($header_field_body, $header_field_name) = @_; return '' unless defined $header_field_body && $header_field_body ne ''; # deal with folding and cream the newlines and such - $header_field_body =~ s/\n[ \t]+/\n /g; + $header_field_body =~ s/\n[ \t]/\n /g; # turning tab into space on folds $header_field_body =~ s/\015?\012//gs; + if ($header_field_body =~ tr/\x00-\x7F//c) { + # Non-ASCII characters in header are not allowed by RFC 5322, but + # RFC 6532 relaxed the rule and allows UTF-8 encoding in header + # field bodies; no other encoding is allowed there (apart from + # RFC 2047 MIME encoded words, which must be all-ASCII anyway). + # The following call keeps UTF-8 octets if valid, otherwise tries + # some decoding guesswork so that the result is valid UTF-8 (octets). + $header_field_body = _normalize($header_field_body, 'UTF-8', 0); + } + if ($header_field_name =~ - /^ (?: (?: Received | (?:Resent-)? (?: Message-ID | Date ) | - MIME-Version | References | In-Reply-To ) \z - | (?: List- | Content- ) ) /xsi ) { + /^ (?: Received | (?:Resent-)? (?: Message-ID | Date ) | + MIME-Version | References | In-Reply-To | List-.* ) \z /xsi ) { # Bug 6945: some header fields must not be processed for MIME encoding + # Bug 7249: leave out the Content-* - } else { - local($1,$2,$3); + } elsif ($header_field_body =~ /=\?/) { # triage for possible encoded-words + local($1,$2,$3,$4); # Multiple encoded sections must ignore the interim whitespace. # To avoid possible FPs with (\s+(?==\?))?, look for the whole RE # separated by whitespace. - 1 while $header_field_body =~ - s{ ( = \? [A-Za-z0-9_-]+ \? [bqBQ] \? [^?]* \? = ) \s+ - ( = \? [A-Za-z0-9_-]+ \? [bqBQ] \? [^?]* \? = ) } - {$1$2}xsg; - - # transcode properly encoded RFC 2047 substrings into UTF-8 octets, - # leave everything else unchanged as it is supposed to be UTF-8 (RFC 6532) - # or plain US-ASCII $header_field_body =~ - s{ (?: = \? ([A-Za-z0-9_-]+) \? ([bqBQ]) \? ([^?]*) \? = ) } - { $self->__decode_header($1, uc($2), $3) }xsge; + s{ ( = \? [A-Za-z0-9*_-]+ \? [bqBQ] \? [^?]* \? = ) \s+ + (?= = \? [A-Za-z0-9*_-]+ \? [bqBQ] \? [^?]* \? = ) }{$1}xsg; + + # Bug 7249: work around violations of the RFC 2047 section 5 requirement: + # Each 'encoded-word' MUST represent an integral number of characters. + # A multi-octet character may not be split across adjacent 'encoded-word's + # Unfortunately such violations are not uncommon. + # + # Bug 7307: to deal with the above, base64/QP decoding must be decoupled + # from decoding a specified multi-byte character set into UTF-8. + # A previous simpler code could not handle base64 fill bits correctly + # (merging of adjecent encoded sections before base64/QP decoding them). + + my @sections; # array of pairs: [string, encoding] + my $last_encoding = ''; + while ( $header_field_body =~ + m{ \G = \? ([A-Za-z0-9*_-]+) \? ([bqBQ]) \? ([^?]*) \? = + | ( [^=]+ | . ) }xsg ) { + my($encoding, $str); + if (defined $1) { # we have an encoded section + $encoding = lc $1; + # decode base64 / QP decoding, remember encoding charset + $str = _decode_mime_encoded_word(undef, $2, $3); + } else { # non-encoded text + $encoding = ''; + $str = $4; + } + if ($encoding eq $last_encoding && @sections) { + # merge sections with same encoding - in violation of RFC 2047 sect.5 + $sections[$#sections]->[0] .= $str; + } else { + push(@sections, [$str, $encoding]); + } + $last_encoding = $encoding; + } + + # transcode encoded RFC 2047 substrings (already base64/QP-decoded) + # into UTF-8 octets, leave everything else unchanged as it is supposed + # to be UTF-8 (RFC 6532) or its plain US-ASCII subset (RFC 5322); + # + my $decoded_result = ''; + for my $sect (@sections) { + my $encoding = $sect->[1]; + # RFC 2231 section 5: Language specification in Encoded Words + # =?US-ASCII*EN?Q?Keith_Moore?= + # strip optional language information following an asterisk + $encoding =~ s{ \* .* \z }{}xs; + $decoded_result .= + $encoding eq '' ? $sect->[0] : _normalize($sect->[0], $encoding, 0, 1); + } + $header_field_body = $decoded_result; } -# dbg("message: _decode_header %s: %s", $header_field_name, $header_field_body); + dbg("message: _decode_header %s: %s", $header_field_name, $header_field_body); return $header_field_body; } diff --git lib/Mail/SpamAssassin/NetSet.pm lib/Mail/SpamAssassin/NetSet.pm index 2fe51300d..12273b37f 100644 --- lib/Mail/SpamAssassin/NetSet.pm +++ lib/Mail/SpamAssassin/NetSet.pm @@ -20,7 +20,7 @@ package Mail::SpamAssassin::NetSet; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Time::HiRes qw(time); use NetAddr::IP 4.000; diff --git lib/Mail/SpamAssassin/PerMsgLearner.pm lib/Mail/SpamAssassin/PerMsgLearner.pm index a2835c585..0cd6ccc67 100644 --- lib/Mail/SpamAssassin/PerMsgLearner.pm +++ lib/Mail/SpamAssassin/PerMsgLearner.pm @@ -48,7 +48,7 @@ package Mail::SpamAssassin::PerMsgLearner; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin; diff --git lib/Mail/SpamAssassin/PerMsgStatus.pm lib/Mail/SpamAssassin/PerMsgStatus.pm index 6d8beaabf..f4c045d15 100644 --- lib/Mail/SpamAssassin/PerMsgStatus.pm +++ lib/Mail/SpamAssassin/PerMsgStatus.pm @@ -55,11 +55,13 @@ use re 'taint'; use Errno qw(ENOENT); use Time::HiRes qw(time); +use Encode; use Mail::SpamAssassin::Constants qw(:sa); use Mail::SpamAssassin::AsyncLoop; use Mail::SpamAssassin::Conf; -use Mail::SpamAssassin::Util qw(untaint_var uri_list_canonicalize); +use Mail::SpamAssassin::Util qw(untaint_var base64_encode idn_to_ascii + uri_list_canonicalize); use Mail::SpamAssassin::Timeout; use Mail::SpamAssassin::Logger; @@ -190,26 +192,21 @@ BEGIN { TESTS => sub { my $pms = shift; my $arg = (shift || ','); - join($arg, sort(@{$pms->{test_names_hit}})) || "none"; + join($arg, sort @{$pms->{test_names_hit}}) || "none"; }, SUBTESTS => sub { my $pms = shift; my $arg = (shift || ','); - join($arg, sort(@{$pms->{subtest_names_hit}})) || "none"; + join($arg, sort @{$pms->{subtest_names_hit}}) || "none"; }, TESTSSCORES => sub { my $pms = shift; my $arg = (shift || ","); - my $line = ''; - foreach my $test (sort @{$pms->{test_names_hit}}) { - my $score = $pms->{conf}->{scores}->{$test}; - $score = '0' if !defined $score; - $line .= $arg if $line ne ''; - $line .= $test . "=" . $score; - } - $line ne '' ? $line : 'none'; + my $scores = $pms->{conf}->{scores}; + join($arg, map($_ . "=" . ($scores->{$_} || '0'), + sort @{$pms->{test_names_hit}})) || "none"; }, PREVIEW => sub { @@ -725,7 +722,7 @@ of the tests which were triggered by the mail. sub get_names_of_tests_hit { my ($self) = @_; - return join(',', sort(@{$self->{test_names_hit}})); + return join(',', sort @{$self->{test_names_hit}}); } =item $list = $status->get_names_of_tests_hit_with_scores_hash () @@ -738,16 +735,10 @@ test names and individual scores of the tests which were triggered by the mail. sub get_names_of_tests_hit_with_scores_hash { my ($self) = @_; - my ($line, %testsscores); - - #BASED ON CODE FOR TESTSSCORES TAG - KAM 2014-04-24 - foreach my $test (@{$self->{test_names_hit}}) { - my $score = $self->{conf}->{scores}->{$test}; - $score = '0' if !defined $score; - - $testsscores{$test} = $score; - } - + #BASED ON CODE FOR TESTSSCORES TAG + my $scores = $self->{conf}->{scores}; + my %testsscores; + $testsscores{$_} = $scores->{$_} || '0' for @{$self->{test_names_hit}}; return \%testsscores; } @@ -761,19 +752,10 @@ test names and individual scores of the tests which were triggered by the mail. sub get_names_of_tests_hit_with_scores { my ($self) = @_; - my ($line, %testsscores); - - #BASED ON CODE FOR TESTSSCORES TAG - KAM 2014-04-24 - foreach my $test (sort @{$self->{test_names_hit}}) { - my $score = $self->{conf}->{scores}->{$test}; - $score = '0' if !defined $score; - $line .= ',' if $line ne ''; - $line .= $test . '=' . $score; - } - - $line ||= 'none'; - - return $line; + #BASED ON CODE FOR TESTSSCORES TAG + my $scores = $self->{conf}->{scores}; + return join(',', map($_ . '=' . ($scores->{$_} || '0'), + sort @{$self->{test_names_hit}})) || "none"; } @@ -792,7 +774,7 @@ underscores, used in meta rules. sub get_names_of_subtests_hit { my ($self) = @_; - return join(',', sort(@{$self->{subtest_names_hit}})); + return join(',', sort @{$self->{subtest_names_hit}}); } ########################################################################### @@ -914,16 +896,16 @@ sub get_content_preview { $str .= shift @{$ary}; } undef $ary; - chomp ($str); $str .= " [...]\n"; # in case the last line was huge, trim it back to around 200 chars local $1; - $str =~ s/^(.{,200}).*$/$1/gs; + $str =~ s/^(.{200,}).+$/$1 [...]/gm; + chomp ($str); $str .= "\n"; # now, some tidy-ups that make things look a bit prettier - $str =~ s/-----Original Message-----.*$//gs; + $str =~ s/-----Original Message-----.*$//gm; $str =~ s/This is a multi-part message in MIME format\.//gs; - $str =~ s/[-_\*\.]{10,}//gs; + $str =~ s/[-_*.]{10,}//gs; $str =~ s/\s+/ /gs; # add "Content preview:" ourselves, so that the text aligns @@ -1024,7 +1006,7 @@ sub _get_added_headers { foreach my $hf_ref (@{$self->{conf}->{$which}}) { my($hfname, $hfbody) = @$hf_ref; my $line = $self->_process_header($hfname,$hfbody); - $line = $self->qp_encode_header($line); + $line = $self->mime_encode_header($line); $str .= "X-Spam-$hfname: $line\n"; } return $str; @@ -1044,21 +1026,23 @@ sub rewrite_report_safe { # This is the new message. my $newmsg = ''; - # the report charset - my $report_charset = "; charset=iso-8859-1"; - if ($self->{conf}->{report_charset}) { - $report_charset = "; charset=" . $self->{conf}->{report_charset}; - } + # the character set of a report + my $report_charset = $self->{conf}->{report_charset} || "UTF-8"; # the SpamAssassin report my $report = $self->get_report(); - # If there are any wide characters, need to MIME-encode in UTF-8 - # TODO: If $report_charset is something other than iso-8859-1/us-ascii, then - # we could try converting to that charset if possible - unless ($] < 5.008 || utf8::downgrade($report, 1)) { - $report_charset = "; charset=utf-8"; - utf8::encode($report); + if (!utf8::is_utf8($report)) { + # already in octets + } else { + # encode to octets + if (uc $report_charset eq 'UTF-8') { + dbg("check: encoding report to $report_charset"); + utf8::encode($report); # very fast + } else { + dbg("check: encoding report to $report_charset. Slow, to be avoided!"); + $report = Encode::encode($report_charset, $report); # slow + } } # get original headers, "pristine" if we can do it @@ -1171,7 +1155,7 @@ Content-Type: multipart/mixed; boundary="$boundary" This is a multi-part message in MIME format. --$boundary -Content-Type: text/plain$report_charset +Content-Type: text/plain; charset=$report_charset Content-Disposition: inline Content-Transfer-Encoding: 8bit @@ -1286,35 +1270,59 @@ sub rewrite_no_report_safe { return $newmsg.$self->{msg}->get_pristine_body(); } -sub qp_encode_header { +# encode a header field body into ASCII as per RFC 2047 +# +sub mime_encode_header { my ($self, $text) = @_; - # do nothing unless there's an 8-bit char - return $text unless ($text =~ /[\x80-\xff]/); + utf8::encode($text) if utf8::is_utf8($text); - my $cs = 'ISO-8859-1'; - if ($self->{report_charset}) { - $cs = $self->{report_charset}; - } + my $result = ''; + for my $line (split(/^/, $text)) { - my @hexchars = split('', '0123456789abcdef'); - my $ord; - local $1; - $text =~ s{([\x80-\xff])}{ - $ord = ord $1; - '='.$hexchars[($ord & 0xf0) >> 4].$hexchars[$ord & 0x0f] - }ges; + if ($line =~ /^[\x09\x20-\x7E]*\r?\n\z/s) { + $result .= $line; # no need for encoding - $text = '=?'.$cs.'?Q?'.$text.'?='; + } else { + my $prefix = ''; + my $suffix = ''; - dbg("markup: encoding header in $cs: $text"); - return $text; + local $1; + if ($line =~ s/( (?: ^ | [ \t] ) [\x09\x20-\x7E]* (?: \r?\n )? ) \z//xs) { + $suffix = $1; + } elsif ($line =~ s/(\r?\n)\z//s) { + $suffix = $1; + } + + if ($line =~ s/^ ( [\x09\x20-\x7E]* (?: [ \t] | \z ) )//xs) { + $prefix = $1; + } + + if ($line eq '') { + $result .= $prefix . $suffix; + } else { + my $qp_enc_count = $line =~ tr/=?_\x00-\x1F\x7F-\xFF//; + if (length($line) + $qp_enc_count*2 <= 4 * int(length($line)+2)/3) { + # RFC 2047: Upper case should be used for hex digits A through F + $line =~ s{ ( [=?_\x00-\x20\x7F-\xFF] ) } + { $1 eq ' ' ? '_' : sprintf("=%02X", ord $1) }xges; + $result .= $prefix . '=?UTF-8?Q?' . $line; + } else { + $result .= $prefix . '=?UTF-8?B?' . base64_encode($line); + } + $result .= '?=' . $suffix; + } + } + } + + dbg("markup: mime_encode_header: %s", $result); + return $result; } sub _process_header { my ($self, $hdr_name, $hdr_data) = @_; - $hdr_data = $self->_replace_tags($hdr_data); + $hdr_data = $self->_replace_tags($hdr_data); # as octets $hdr_data =~ s/(?:\r?\n)+$//; # make sure there are no trailing newlines ... if ($self->{conf}->{fold_headers}) { @@ -1352,7 +1360,13 @@ sub _replace_tags { # _get_tag on an attempt to use such tag in add_header template } else { $result = $self->get_tag_raw($tag,$3); - $result = join(' ',@$result) if ref $result eq 'ARRAY'; + if (!ref $result) { + utf8::encode($result) if utf8::is_utf8($result); + } elsif (ref $result eq 'ARRAY') { + my @values = @$result; # avoid modifying referenced array + for (@values) { utf8::encode($_) if utf8::is_utf8($_) } + $result = join(' ', @values); + } } defined $result ? $result : $full; }ge; @@ -1714,23 +1728,28 @@ sub extract_message_metadata { $self->{$item} = $self->{msg}->{metadata}->{$item}; } - # TODO: International domain names (UTF-8) must be converted to - # ASCII-compatible encoding (ACE) for the purpose of setting the - # SENDERDOMAIN and AUTHORDOMAIN tags (and probably for other uses too). - # (explicitly required for DMARC, draft-kucherawy-dmarc-base sect. 5.6.1) + # International domain names (UTF-8) must be converted to ASCII-compatible + # encoding (ACE) for the purpose of setting the SENDERDOMAIN and AUTHORDOMAIN + # tags (explicitly required for DMARC, RFC 7489) # { local $1; my $addr = $self->get('EnvelopeFrom:addr', undef); # collect a FQDN, ignoring potential trailing WSP if (defined $addr && $addr =~ /\@([^@. \t]+\.[^@ \t]+?)[ \t]*\z/s) { - $self->set_tag('SENDERDOMAIN', lc $1); + my $d = idn_to_ascii($1); + $self->set_tag('SENDERDOMAIN', $d); + $self->{msg}->put_metadata("X-SenderDomain", $d); + dbg("metadata: X-SenderDomain: %s", $d); } # TODO: the get ':addr' only returns the first address; this should be # augmented to be able to return all addresses in a header field, multiple # addresses in a From header field are allowed according to RFC 5322 $addr = $self->get('From:addr', undef); if (defined $addr && $addr =~ /\@([^@. \t]+\.[^@ \t]+?)[ \t]*\z/s) { - $self->set_tag('AUTHORDOMAIN', lc $1); + my $d = idn_to_ascii($1); + $self->set_tag('AUTHORDOMAIN', $d); + $self->{msg}->put_metadata("X-AuthorDomain", $d); + dbg("metadata: X-AuthorDomain: %s", $d); } } @@ -1980,7 +1999,8 @@ sub _get { else { my @results = $getraw ? $self->{msg}->raw_header($request) : $self->{msg}->get_header($request); - # dbg("message: get(%s) = %s", $request, join(", ",@results)); + # dbg("message: get(%s)%s = %s", + # $request, $getraw?'raw':'', join(", ",@results)); if (@results) { $result = join('', @results); } else { # metadata @@ -3073,24 +3093,25 @@ sub all_from_addrs_domains { #TEST POINT - my @addrs = ("test.voipquotes2.net","test.voipquotes2.co.uk"); #Start with all the normal from addrs - my @addrs = &all_from_addrs($self); + my @addrs = all_from_addrs($self); dbg("eval: all '*From' addrs domains (before): " . join(" ", @addrs)); - #loop through and limit to just the domain with a dummy address - for (my $i = 0; $i < scalar(@addrs); $i++) { - $addrs[$i] = 'dummy@'.$self->{main}->{registryboundaries}->uri_to_domain($addrs[$i]); + #Take just the domain with a dummy localpart + #removing invalid and duplicate domains + my(%addrs_seen, @addrs_filtered); + foreach my $a (@addrs) { + my $domain = $self->{main}->{registryboundaries}->uri_to_domain($a); + next if !defined $domain || $addrs_seen{lc $domain}++; + push(@addrs_filtered, 'dummy@'.$domain); } - #Remove duplicate domains - my %addrs = map { $_ => 1 } @addrs; - @addrs = keys %addrs; + dbg("eval: all '*From' addrs domains (after uri to domain): " . + join(" ", @addrs_filtered)); - dbg("eval: all '*From' addrs domains (after uri to domain): " . join(" ", @addrs)); + $self->{all_from_addrs_domains} = \@addrs_filtered; - $self->{all_from_addrs_domains} = \@addrs; - - return @addrs; + return @addrs_filtered; } sub all_to_addrs { diff --git lib/Mail/SpamAssassin/PersistentAddrList.pm lib/Mail/SpamAssassin/PersistentAddrList.pm index e2f112079..222632b93 100644 --- lib/Mail/SpamAssassin/PersistentAddrList.pm +++ lib/Mail/SpamAssassin/PersistentAddrList.pm @@ -48,7 +48,7 @@ package Mail::SpamAssassin::PersistentAddrList; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw{ diff --git lib/Mail/SpamAssassin/Plugin.pm lib/Mail/SpamAssassin/Plugin.pm index 2893dee18..a49013f2d 100644 --- lib/Mail/SpamAssassin/Plugin.pm +++ lib/Mail/SpamAssassin/Plugin.pm @@ -99,7 +99,7 @@ use Mail::SpamAssassin::Logger; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw{ diff --git lib/Mail/SpamAssassin/Plugin/ASN.pm lib/Mail/SpamAssassin/Plugin/ASN.pm index 3b406b00f..a5143fd12 100644 --- lib/Mail/SpamAssassin/Plugin/ASN.pm +++ lib/Mail/SpamAssassin/Plugin/ASN.pm @@ -355,8 +355,10 @@ sub process_dns_result { foreach my $rr (@answer) { dbg("asn: %s: lookup result packet: %s", $zone, $rr->string); next if $rr->type ne 'TXT'; - my @strings = $rr->char_str_list; + my @strings = Net::DNS->VERSION >= 0.69 ? $rr->txtdata + : $rr->char_str_list; next if !@strings; + for (@strings) { utf8::encode($_) if utf8::is_utf8($_) } my @items; if (@strings > 1 && join('',@strings) !~ m{\|}) { diff --git lib/Mail/SpamAssassin/Plugin/AWL.pm lib/Mail/SpamAssassin/Plugin/AWL.pm index 902dd242a..5d187efb0 100644 --- lib/Mail/SpamAssassin/Plugin/AWL.pm +++ lib/Mail/SpamAssassin/Plugin/AWL.pm @@ -60,7 +60,7 @@ package Mail::SpamAssassin::Plugin::AWL; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin::Plugin; use Mail::SpamAssassin::AutoWhitelist; diff --git lib/Mail/SpamAssassin/Plugin/AccessDB.pm lib/Mail/SpamAssassin/Plugin/AccessDB.pm index 7fd9f284b..0dad95953 100644 --- lib/Mail/SpamAssassin/Plugin/AccessDB.pm +++ lib/Mail/SpamAssassin/Plugin/AccessDB.pm @@ -57,7 +57,7 @@ use Mail::SpamAssassin::Logger; use Fcntl; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/AntiVirus.pm lib/Mail/SpamAssassin/Plugin/AntiVirus.pm index 8605d7f89..0fa35b7e6 100644 --- lib/Mail/SpamAssassin/Plugin/AntiVirus.pm +++ lib/Mail/SpamAssassin/Plugin/AntiVirus.pm @@ -49,7 +49,7 @@ use Mail::SpamAssassin::Plugin; use Mail::SpamAssassin::Util; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/AskDNS.pm lib/Mail/SpamAssassin/Plugin/AskDNS.pm index 3511162e8..ac533a779 100644 --- lib/Mail/SpamAssassin/Plugin/AskDNS.pm +++ lib/Mail/SpamAssassin/Plugin/AskDNS.pm @@ -140,7 +140,7 @@ IPv4 address. In case of a TXT or SPF resource record which can return multiple character-strings (as defined in Section 3.3 of [RFC1035]), these strings are concatenated with no delimiters before comparing the result to the filtering string. This follows requirements of several documents, -such as RFC 5518, RFC 4408, RFC 4871, RFC 5617. Examples of a plain text +such as RFC 5518, RFC 7208, RFC 4871, RFC 5617. Examples of a plain text filtering parameter: "127.0.0.1", "transaction", 'list' . A regular expression follows a familiar perl syntax like /.../ or m{...} @@ -189,7 +189,7 @@ use warnings; use re 'taint'; use Mail::SpamAssassin::Plugin; -use Mail::SpamAssassin::Util qw(decode_dns_question_entry); +use Mail::SpamAssassin::Util qw(decode_dns_question_entry idn_to_ascii); use Mail::SpamAssassin::Logger; use vars qw(@ISA %rcode_value $txtdata_can_provide_a_list); @@ -320,8 +320,9 @@ sub set_config { my @answer_types = split(/,/, $query_type); # http://www.iana.org/assignments/dns-parameters/dns-parameters.xml if (grep(!/^(?:ANY|A|AAAA|MX|TXT|PTR|NAPTR|NS|SOA|CERT|CNAME|DNAME| - DHCID|HINFO|MINFO|RP|HIP|IPSECKEY|KX|LOC|SRV| - SSHFP|SPF)\z/x, @answer_types)) { + DHCID|HINFO|MINFO|RP|HIP|IPSECKEY|KX|LOC|GPOS|SRV| + OPENPGPKEY|SSHFP|SPF|TLSA|URI|CAA|CSYNC)\z/x, + @answer_types)) { return $Mail::SpamAssassin::Conf::INVALID_VALUE; } $query_type = 'ANY' if @answer_types > 1 || $answer_types[0] eq 'ANY'; @@ -465,6 +466,7 @@ OUTER: $query_domain =~ s{_([A-Z][A-Z0-9]*)_} { defined $current_tag_val{$1} ? $current_tag_val{$1} : '' }ge; + $query_domain = idn_to_ascii($query_domain); # the $dnskey identifies this query in AsyncLoop's pending_lookups my $dnskey = join(':', 'askdns', $query_type, $query_domain); @@ -539,7 +541,7 @@ sub process_response_packet { @answer = ( undef ); } - # NOTE: $rr->rdatastr returns the result encoded in a DNS zone file + # NOTE: $rr->rdstring returns the result encoded in a DNS zone file # format, i.e. enclosed in double quotes if a result contains whitespace # (or other funny characters), and may use \DDD encoding or \X quoting as # per RFC 1035. Using $rr->txtdata instead avoids this unnecessary encoding @@ -557,7 +559,7 @@ sub process_response_packet { # the code handling such reply from DNS MUST assemble all of these # marshaled text blocks into a single one before any syntactical # verification takes place. - # The same goes for RFC 4408 (SPF), RFC 4871 (DKIM), RFC 5617 (ADSP), + # The same goes for RFC 7208 (SPF), RFC 4871 (DKIM), RFC 5617 (ADSP), # draft-kucherawy-dmarc-base (DMARC), ... for my $rr (@answer) { @@ -566,19 +568,37 @@ sub process_response_packet { # special case, no answer records, only rcode can be tested } else { $rr_type = uc $rr->type; - if ($rr->UNIVERSAL::can('txtdata')) { # TXT, SPF - # join with no intervening spaces, as per RFC 5518 + if ($rr_type eq 'A') { + # Net::DNS::RR::A::address() is available since Net::DNS 0.69 + $rr_rdatastr = $rr->UNIVERSAL::can('address') ? $rr->address + : $rr->rdatastr; + if ($rr_rdatastr =~ m/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\z/) { + $rdatanum = Mail::SpamAssassin::Util::my_inet_aton($rr_rdatastr); + } + + } elsif ($rr->UNIVERSAL::can('txtdata')) { + # TXT, SPF: join with no intervening spaces, as per RFC 5518 if ($txtdata_can_provide_a_list || $rr_type ne 'TXT') { $rr_rdatastr = join('', $rr->txtdata); # txtdata() in list context! } else { # char_str_list() is only available for TXT records $rr_rdatastr = join('', $rr->char_str_list); # historical } + # Net::DNS attempts to decode text strings in a TXT record as UTF-8, + # which is undesired: octets failing the UTF-8 decoding are converted + # to a Unicode "replacement character" U+FFFD (encoded as octets + # \x{EF}\x{BF}\x{BD} in UTF-8), and ASCII text is unnecessarily + # flagged as perl native characters (utf8 flag on), which can be + # disruptive on later processing, e.g. implicitly upgrading strings + # on concatenation. Unfortunately there is no way of legally bypassing + # the UTF-8 decoding by Net::DNS::RR::TXT in Net::DNS::RR::Text. + # Try to minimize damage by encoding back to UTF-8 octets: + utf8::encode($rr_rdatastr) if utf8::is_utf8($rr_rdatastr); + } else { - $rr_rdatastr = $rr->rdatastr; - if ($rr_type eq 'A' && - $rr_rdatastr =~ m/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\z/) { - $rdatanum = Mail::SpamAssassin::Util::my_inet_aton($rr_rdatastr); - } + # rdatastr() is historical, use rdstring() since Net::DNS 0.69 + $rr_rdatastr = $rr->UNIVERSAL::can('rdstring') ? $rr->rdstring + : $rr->rdatastr; + utf8::encode($rr_rdatastr) if utf8::is_utf8($rr_rdatastr); } # dbg("askdns: received rr type %s, data: %s", $rr_type, $rr_rdatastr); } diff --git lib/Mail/SpamAssassin/Plugin/AutoLearnThreshold.pm lib/Mail/SpamAssassin/Plugin/AutoLearnThreshold.pm index af258eb77..34bb87cf6 100644 --- lib/Mail/SpamAssassin/Plugin/AutoLearnThreshold.pm +++ lib/Mail/SpamAssassin/Plugin/AutoLearnThreshold.pm @@ -55,7 +55,7 @@ use Mail::SpamAssassin::Plugin; use Mail::SpamAssassin::Logger; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/Bayes.pm lib/Mail/SpamAssassin/Plugin/Bayes.pm index 1877fc0b2..d053928fc 100644 --- lib/Mail/SpamAssassin/Plugin/Bayes.pm +++ lib/Mail/SpamAssassin/Plugin/Bayes.pm @@ -45,7 +45,7 @@ package Mail::SpamAssassin::Plugin::Bayes; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; BEGIN { @@ -200,7 +200,9 @@ use constant ADD_INVIZ_TOKENS_NO_PREFIX => 0; 'X-Authentication-Warning' => '*a', 'Organization' => '*o', 'Organisation' => '*o', - 'Content-Type' => '*c', + 'Content-Type' => '*ct', + 'Content-Disposition' => '*cd', + 'Content-Transfer-Encoding' => '*ce', 'x-spam-relays-trusted' => '*RT', 'x-spam-relays-untrusted' => '*RU', ); @@ -1120,7 +1122,7 @@ sub tokenize { # generate an SHA1 hash and take the lower 40 bits as our token my %tokens; foreach my $token (@tokens) { - # skip empty tokens + # dbg("bayes: token: %s", $token); $tokens{substr(sha1($token), -5)} = $token if $token ne ''; } @@ -1217,7 +1219,7 @@ sub _tokenize_line { my(@t) = $token =~ /( (?: [\xE0-\xEF] | [\xF0-\xF4][\x80-\xBF] ) [\x80-\xBF]{2} )/xsg; if (@t) { - push (@rettokens, map('u8:'.$_, @t)); + push (@rettokens, map($tokprefix.'u8:'.$_, @t)); next; } } @@ -1227,7 +1229,7 @@ sub _tokenize_line { # but I'm doing tuples to keep the dbs small(er)." Sounds like a plan # to me! (jm) while ($token =~ s/^(..?)//) { - push (@rettokens, "8:$1"); + push (@rettokens, $tokprefix.'8:'.$1); } next; } diff --git lib/Mail/SpamAssassin/Plugin/BodyEval.pm lib/Mail/SpamAssassin/Plugin/BodyEval.pm index 92c1892ac..b2910f686 100644 --- lib/Mail/SpamAssassin/Plugin/BodyEval.pm +++ lib/Mail/SpamAssassin/Plugin/BodyEval.pm @@ -23,7 +23,7 @@ use Mail::SpamAssassin::Constants qw(:sa); use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm index 568e4eee2..31c65d600 100644 --- lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm +++ lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm @@ -37,7 +37,7 @@ use Data::Dumper; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/DCC.pm lib/Mail/SpamAssassin/Plugin/DCC.pm index 10b693e31..5c97d1771 100644 --- lib/Mail/SpamAssassin/Plugin/DCC.pm +++ lib/Mail/SpamAssassin/Plugin/DCC.pm @@ -77,7 +77,7 @@ package Mail::SpamAssassin::Plugin::DCC; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin::Plugin; diff --git lib/Mail/SpamAssassin/Plugin/DKIM.pm lib/Mail/SpamAssassin/Plugin/DKIM.pm index cced59efc..0166e2a7c 100644 --- lib/Mail/SpamAssassin/Plugin/DKIM.pm +++ lib/Mail/SpamAssassin/Plugin/DKIM.pm @@ -99,6 +99,9 @@ header fields, other plugins, etc.: _DKIMDOMAIN_ Signing Domain Identifier (SDID) (the 'd' tag) from valid signatures; + _DKIMSELECTOR_ + DKIM selector (the 's' tag) from valid signatures; + Identities and domains from signatures which failed verification are not included in these tags. Duplicates are eliminated (e.g. when there are two or more valid signatures from the same signer, only one copy makes it into a tag). @@ -122,10 +125,11 @@ package Mail::SpamAssassin::Plugin::DKIM; use Mail::SpamAssassin::Plugin; use Mail::SpamAssassin::Logger; use Mail::SpamAssassin::Timeout; +use Mail::SpamAssassin::Util qw(idn_to_ascii); use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); @@ -178,13 +182,18 @@ sub set_config { Works similarly to whitelist_from, except that in addition to matching an author address (From) to the pattern in the first parameter, the message -must also carry a Domain Keys Identified Mail (DKIM) signature made by a -signing domain (SDID, i.e. the d= tag) that is acceptable to us. +must also carry a valid Domain Keys Identified Mail (DKIM) signature made by +a signing domain (SDID, i.e. the d= tag) that is acceptable to us. Only one whitelist entry is allowed per line, as in C. Multiple C lines are allowed. File-glob style characters are allowed for the From address (the first parameter), just like with -C. The second parameter does not accept wildcards. +C. + +The second parameter (the signing-domain) does not accept full file-glob style +wildcards, although a simple '*.' (or just a '.') prefix to a domain name +is recognized and implies any subdomain of the specified domain (but not +the domain itself). If no signing-domain parameter is specified, the only acceptable signature will be an Author Domain Signature (sometimes called first-party signature) @@ -205,7 +214,8 @@ Examples of whitelisting based on third-party signatures: whitelist_from_dkim jane@example.net example.org whitelist_from_dkim rick@info.example.net example.net whitelist_from_dkim *@info.example.net example.net - whitelist_from_dkim *@* remailer.example.com + whitelist_from_dkim *@* mail7.remailer.example.com + whitelist_from_dkim *@* *.remailer.example.com =item def_whitelist_from_dkim author@example.com [signing-domain] @@ -376,7 +386,8 @@ some valid signature on a message has no reputational value (without being associated with a particular domain), regardless of its key size - anyone can prepend its own signature on a copy of some third party mail and re-send it, which makes it no more trustworthy than without such signature. This is also -a reason for a rule DKIM_VALID to have a near-zero score. +a reason for a rule DKIM_VALID to have a near-zero score, i.e. a rule hit +is only informational. =cut @@ -786,7 +797,8 @@ sub _check_dkim_signature { # Only do so if EDNS0 provides a reasonably-sized UDP payload size, # as our interface does not provide a DNS fallback to TCP, unlike # the Net::DNS::Resolver::send which does provide it. - my $res = $self->{main}->{resolver}->get_resolver; + my $res = $self->{main}->{resolver}; + dbg("dkim: providing our own resolver: %s", ref $res); Mail::DKIM::DNS::resolver($res); } } @@ -892,13 +904,13 @@ sub _check_dkim_signature { } } if (would_log("dbg","dkim")) { - dbg("dkim: %s %s, i=%s, d=%s, s=%s, a=%s, c=%s, %s, %s", + dbg("dkim: %s %s, i=%s, d=%s, s=%s, a=%s, c=%s, %s, %s, %s", $info, $signature->isa('Mail::DKIM::DkSignature') ? 'DK' : 'DKIM', map(!defined $_ ? '(undef)' : $_, $signature->identity, $d, $signature->selector, $signature->algorithm, scalar($signature->canonicalization), - $key_size ? "key_bits=$key_size" : (), + $key_size ? "key_bits=$key_size" : "unknown key size", ($sig_result_supported ? $signature : $verifier)->result ), defined $d && $pms->{dkim_author_domains}->{$d} ? 'matches author domain' @@ -915,15 +927,19 @@ sub _check_dkim_signature { dbg("dkim: signature verification result: %s", uc($sig_res)); # supply values for both tags - my(%seen1, %seen2, @identity_list, @domain_list); + my(%seen1, %seen2, %seen3, @identity_list, @domain_list, @selector_list); @identity_list = grep(defined $_ && $_ ne '' && !$seen1{$_}++, map($_->identity, @valid_signatures)); @domain_list = grep(defined $_ && $_ ne '' && !$seen2{$_}++, map($_->domain, @valid_signatures)); + @selector_list = grep(defined $_ && $_ ne '' && !$seen3{$_}++, + map($_->selector, @valid_signatures)); $pms->set_tag('DKIMIDENTITY', @identity_list == 1 ? $identity_list[0] : \@identity_list); $pms->set_tag('DKIMDOMAIN', @domain_list == 1 ? $domain_list[0] : \@domain_list); + $pms->set_tag('DKIMSELECTOR', + @selector_list == 1 ? $selector_list[0] : \@selector_list); } elsif (@signatures) { $pms->{dkim_signed} = 1; my $sig = $signatures[0]; @@ -1040,12 +1056,13 @@ sub _check_dkim_adsp { my $err = $timer->run_and_catch(sub { eval { if (Mail::DKIM::AuthorDomainPolicy->UNIVERSAL::can("fetch")) { + my $author_domain_ace = idn_to_ascii($author_domain); dbg("dkim: adsp: performing lookup on _adsp._domainkey.%s", - $author_domain); + $author_domain_ace); # get our Net::DNS::Resolver object my $res = $self->{main}->{resolver}->get_resolver; $practices = Mail::DKIM::AuthorDomainPolicy->fetch( - Protocol => "dns", Domain => $author_domain, + Protocol => "dns", Domain => $author_domain_ace, DnsResolver => $res); } 1; @@ -1257,8 +1274,12 @@ sub _wlcheck_list { # identity (AUID). Nevertheless, be prepared to accept the full e-mail # address there for compatibility, and just ignore its local-part. - $acceptable_sdid = $1 if $acceptable_sdid =~ /\@([^\@]*)\z/; - $matches = 1 if $sdid eq lc $acceptable_sdid; + $acceptable_sdid = $1 if $acceptable_sdid =~ /\@([^\@]*)\z/s; + if ($acceptable_sdid =~ s/^\*?\.//s) { + $matches = 1 if $sdid =~ /\.\Q$acceptable_sdid\E\z/si; + } else { + $matches = 1 if $sdid eq lc $acceptable_sdid; + } } if ($matches) { if (would_log("dbg","dkim")) { diff --git lib/Mail/SpamAssassin/Plugin/DNSEval.pm lib/Mail/SpamAssassin/Plugin/DNSEval.pm index aa3195288..5581a3206 100644 --- lib/Mail/SpamAssassin/Plugin/DNSEval.pm +++ lib/Mail/SpamAssassin/Plugin/DNSEval.pm @@ -31,7 +31,7 @@ use Mail::SpamAssassin::Util qw(reverse_ip_address); use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/FreeMail.pm lib/Mail/SpamAssassin/Plugin/FreeMail.pm index 60e1ec583..b7df82685 100644 --- lib/Mail/SpamAssassin/Plugin/FreeMail.pm +++ lib/Mail/SpamAssassin/Plugin/FreeMail.pm @@ -342,20 +342,21 @@ sub _parse_body { my $body = $pms->get_decoded_stripped_body_text_array(); BODY: foreach (@$body) { # strip urls with possible emails inside - s#, not mailto: # also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc) - s#{email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi; + s{{email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)}{ }gi; while (/$self->{email_regex}/g) { my $email = lc($1); - push(@body_emails, $email) unless defined $seen{$email}; + utf8::encode($email) if utf8::is_utf8($email); # chars to UTF-8 + push(@body_emails, $email) unless $seen{$email}; $seen{$email} = 1; - last BODY if scalar @body_emails >= 40; # sanity + last BODY if @body_emails >= 40; # sanity } } my $count_all = 0; my $count_fm = 0; - foreach my $email (@body_emails) { + foreach my $email (@body_emails) { # as UTF-8 octets if (++$count_all == $pms->{main}->{conf}->{freemail_max_body_emails}) { if ($pms->{main}->{conf}->{freemail_skip_when_over_max}) { $pms->{freemail_skip_body} = 1; diff --git lib/Mail/SpamAssassin/Plugin/HTMLEval.pm lib/Mail/SpamAssassin/Plugin/HTMLEval.pm index cf5ae7346..879741b3d 100644 --- lib/Mail/SpamAssassin/Plugin/HTMLEval.pm +++ lib/Mail/SpamAssassin/Plugin/HTMLEval.pm @@ -19,7 +19,7 @@ package Mail::SpamAssassin::Plugin::HTMLEval; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin::Plugin; diff --git lib/Mail/SpamAssassin/Plugin/HTTPSMismatch.pm lib/Mail/SpamAssassin/Plugin/HTTPSMismatch.pm index fceef3fbf..70e0ca248 100644 --- lib/Mail/SpamAssassin/Plugin/HTTPSMismatch.pm +++ lib/Mail/SpamAssassin/Plugin/HTTPSMismatch.pm @@ -21,7 +21,7 @@ use Mail::SpamAssassin::Plugin; use Mail::SpamAssassin::Logger; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/Hashcash.pm lib/Mail/SpamAssassin/Plugin/Hashcash.pm index 566b7b998..cb6146ace 100644 --- lib/Mail/SpamAssassin/Plugin/Hashcash.pm +++ lib/Mail/SpamAssassin/Plugin/Hashcash.pm @@ -85,7 +85,7 @@ package Mail::SpamAssassin::Plugin::Hashcash; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin::Plugin; diff --git lib/Mail/SpamAssassin/Plugin/HeaderEval.pm lib/Mail/SpamAssassin/Plugin/HeaderEval.pm index e39756774..beceb8843 100644 --- lib/Mail/SpamAssassin/Plugin/HeaderEval.pm +++ lib/Mail/SpamAssassin/Plugin/HeaderEval.pm @@ -19,12 +19,14 @@ package Mail::SpamAssassin::Plugin::HeaderEval; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Errno qw(EBADF); use Mail::SpamAssassin::Plugin; use Mail::SpamAssassin::Locales; +use Mail::SpamAssassin::Util qw(get_my_locales parse_rfc822_date + idn_to_ascii is_valid_utf_8); use Mail::SpamAssassin::Logger; use Mail::SpamAssassin::Constants qw(:sa :ip); @@ -116,7 +118,7 @@ sub check_for_faraway_charset_in_headers { my ($self, $pms) = @_; my $hdr; - my @locales = Mail::SpamAssassin::Util::get_my_locales($self->{main}->{conf}->{ok_locales}); + my @locales = get_my_locales($self->{main}->{conf}->{ok_locales}); return 0 if grep { $_ eq "all" } @locales; @@ -276,6 +278,17 @@ sub check_illegal_chars { $str =~ s/^(?:Subject|From):.*$//gmi; } + if ($str =~ tr/\x00-\x7F//c && is_valid_utf_8($str)) { + # is non-ASCII and is valid UTF-8 + if ($str =~ tr/\x00-\x08\x0B\x0C\x0E-\x1F//) { + dbg("eval: %s is valid UTF-8 but contains controls: %s", $header, $str); + } else { + # todo: only with a SMTPUTF8 mail + dbg("eval: %s is valid UTF-8: %s", $header, $str); + return 0; + } + } + # count illegal substrings (RFC 2045) # (non-ASCII + C0 controls except TAB, NL, CR) my $illegal = $str =~ tr/\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff//; @@ -722,7 +735,7 @@ sub _get_date_header_time { for my $date (@dates) { if (defined($date) && length($date)) { chomp($date); - $time = Mail::SpamAssassin::Util::parse_rfc822_date($date); + $time = parse_rfc822_date($date); } last DATE if defined($time); } @@ -773,7 +786,7 @@ sub _get_received_header_times { if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) { my $date = $1; dbg2("eval: trying Received fetchmail header date for real time: $date"); - my $time = Mail::SpamAssassin::Util::parse_rfc822_date($date); + my $time = parse_rfc822_date($date); if (defined($time) && (time() >= $time)) { dbg2("eval: time_t from date=$time, rcvd=$date"); push @fetchmail_times, $time; @@ -793,7 +806,7 @@ sub _get_received_header_times { if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) { my $date = $1; dbg2("eval: trying Received header date for real time: $date"); - my $time = Mail::SpamAssassin::Util::parse_rfc822_date($date); + my $time = parse_rfc822_date($date); if (defined($time)) { dbg2("eval: time_t from date=$time, rcvd=$date"); push @header_times, $time; @@ -953,14 +966,14 @@ sub check_outlook_message_id { my $fudge = 250; $_ = $pms->get('Date'); - $_ = Mail::SpamAssassin::Util::parse_rfc822_date($_) || 0; + $_ = parse_rfc822_date($_) || 0; my $expected = int (($_ * $x) + $y); my $diff = $timetoken - $expected; return 0 if (abs($diff) < $fudge); $_ = $pms->get('Received'); /(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+).*?$/; - $_ = Mail::SpamAssassin::Util::parse_rfc822_date($_) || 0; + $_ = parse_rfc822_date($_) || 0; $expected = int(($_ * $x) + $y); $diff = $timetoken - $expected; @@ -1035,7 +1048,7 @@ sub check_ratware_envelope_from { return 0 if $from eq '' || $to eq ''; return 0 if $from =~ /^SRS\d=/; - if ($to =~ /^([^@]+)@(.+)$/) { + if ($to =~ /^([^@]+)\@(.+)$/) { my($user,$dom) = ($1,$2); $dom = $self->{main}->{registryboundaries}->trim_domain($dom); return unless diff --git lib/Mail/SpamAssassin/Plugin/ImageInfo.pm lib/Mail/SpamAssassin/Plugin/ImageInfo.pm index 90014b878..747382417 100644 --- lib/Mail/SpamAssassin/Plugin/ImageInfo.pm +++ lib/Mail/SpamAssassin/Plugin/ImageInfo.pm @@ -84,7 +84,7 @@ use Mail::SpamAssassin::Plugin; use Mail::SpamAssassin::Logger; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/MIMEEval.pm lib/Mail/SpamAssassin/Plugin/MIMEEval.pm index 19261da95..b7255003b 100644 --- lib/Mail/SpamAssassin/Plugin/MIMEEval.pm +++ lib/Mail/SpamAssassin/Plugin/MIMEEval.pm @@ -15,11 +15,38 @@ # limitations under the License. # +=head1 NAME + +MIMEEval - perform various tests against MIME structure and body + +=head1 SYNOPSIS + + loadplugin Mail::SpamAssassin::Plugin::MIMEEval + + body NAME_OF_RULE eval:check_for_mime + body NAME_OF_RULE eval:check_for_mime_html + body NAME_OF_RULE eval:check_for_mime_html_only + body NAME_OF_RULE eval:check_mime_multipart_ratio + body NAME_OF_RULE eval:check_msg_parse_flags + body NAME_OF_RULE eval:check_for_ascii_text_illegal + body NAME_OF_RULE eval:check_abundant_unicode_ratio + body NAME_OF_RULE eval:check_for_faraway_charset + body NAME_OF_RULE eval:check_for_uppercase + body NAME_OF_RULE eval:check_ma_non_text + body NAME_OF_RULE eval:check_base64_length + body NAME_OF_RULE eval:check_qp_ratio + +=head1 DESCRIPTION + +Perform various tests against MIME structure and body. + +=cut + package Mail::SpamAssassin::Plugin::MIMEEval; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin::Plugin; @@ -69,6 +96,7 @@ sub are_more_high_bits_set { ($numlos <= $numhis && $numhis > 3); } + =over 4 =item has_check_for_ascii_text_illegal @@ -474,6 +502,7 @@ sub _check_attachments { Adds capability check for "if can()" for check_qp_ratio =cut + sub has_check_qp_ratio { 1 } =item check_qp_ratio @@ -484,6 +513,7 @@ quoted printable to total bytes in an email. =back =cut + sub check_qp_ratio { my ($self, $pms, undef, $min) = @_; diff --git lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm index 322f07b8b..b117f502c 100644 --- lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm +++ lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm @@ -59,7 +59,7 @@ package Mail::SpamAssassin::Plugin::MIMEHeader; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin::Plugin; diff --git lib/Mail/SpamAssassin/Plugin/NetCache.pm lib/Mail/SpamAssassin/Plugin/NetCache.pm index c17d8629c..448a3f787 100644 --- lib/Mail/SpamAssassin/Plugin/NetCache.pm +++ lib/Mail/SpamAssassin/Plugin/NetCache.pm @@ -47,7 +47,7 @@ use Mail::SpamAssassin::Util; use Mail::SpamAssassin::Logger; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/P595Body.pm lib/Mail/SpamAssassin/Plugin/P595Body.pm index dc722200e..3bab1e376 100644 --- lib/Mail/SpamAssassin/Plugin/P595Body.pm +++ lib/Mail/SpamAssassin/Plugin/P595Body.pm @@ -23,7 +23,7 @@ use Mail::SpamAssassin::Plugin::OneLineBodyRuleType; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/PDFInfo.pm lib/Mail/SpamAssassin/Plugin/PDFInfo.pm index 2242aecc8..fa8d60b8e 100644 --- lib/Mail/SpamAssassin/Plugin/PDFInfo.pm +++ lib/Mail/SpamAssassin/Plugin/PDFInfo.pm @@ -146,7 +146,7 @@ use Mail::SpamAssassin::Plugin; use Mail::SpamAssassin::Logger; use strict; use warnings; -use bytes; +# use bytes; use Digest::MD5 qw(md5_hex); use MIME::QuotedPrint; diff --git lib/Mail/SpamAssassin/Plugin/Pyzor.pm lib/Mail/SpamAssassin/Plugin/Pyzor.pm index 18023b43d..ba9383b6b 100644 --- lib/Mail/SpamAssassin/Plugin/Pyzor.pm +++ lib/Mail/SpamAssassin/Plugin/Pyzor.pm @@ -41,7 +41,7 @@ use Mail::SpamAssassin::Util qw(untaint_var untaint_file_path proc_status_ok exit_status_str); use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/RabinKarpBody.pm lib/Mail/SpamAssassin/Plugin/RabinKarpBody.pm index 6ab9b6c0e..ce14906f0 100644 --- lib/Mail/SpamAssassin/Plugin/RabinKarpBody.pm +++ lib/Mail/SpamAssassin/Plugin/RabinKarpBody.pm @@ -24,7 +24,7 @@ use Mail::SpamAssassin::Plugin::OneLineBodyRuleType; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/Razor2.pm lib/Mail/SpamAssassin/Plugin/Razor2.pm index e24252ce0..d4acad11e 100644 --- lib/Mail/SpamAssassin/Plugin/Razor2.pm +++ lib/Mail/SpamAssassin/Plugin/Razor2.pm @@ -45,7 +45,7 @@ use Mail::SpamAssassin::Logger; use Mail::SpamAssassin::Timeout; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); @@ -368,11 +368,11 @@ sub plugin_revoke { if ($self->razor2_access($options->{text}, 'revoke', undef)) { $options->{revoke}->{revoke_available} = 1; - dbg('reporter: spam revoked from Razor'); + info('reporter: spam revoked from Razor'); $options->{revoke}->{revoke_return} = 1; } else { - dbg('reporter: could not revoke spam from Razor'); + info('reporter: could not revoke spam from Razor'); } } diff --git lib/Mail/SpamAssassin/Plugin/RelayCountry.pm lib/Mail/SpamAssassin/Plugin/RelayCountry.pm index 2e172f3f7..9fce3d763 100644 --- lib/Mail/SpamAssassin/Plugin/RelayCountry.pm +++ lib/Mail/SpamAssassin/Plugin/RelayCountry.pm @@ -44,7 +44,7 @@ use Mail::SpamAssassin::Logger; use Mail::SpamAssassin::Constants qw(:ip); use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/RelayEval.pm lib/Mail/SpamAssassin/Plugin/RelayEval.pm index 9408c4191..164b114bc 100644 --- lib/Mail/SpamAssassin/Plugin/RelayEval.pm +++ lib/Mail/SpamAssassin/Plugin/RelayEval.pm @@ -23,7 +23,7 @@ use Mail::SpamAssassin::Constants qw(:ip); use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm index ee187e51d..057554738 100644 --- lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm +++ lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm @@ -55,7 +55,7 @@ use Mail::SpamAssassin::Logger; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/Reuse.pm lib/Mail/SpamAssassin/Plugin/Reuse.pm index 1539f6eae..c8c6acb12 100644 --- lib/Mail/SpamAssassin/Plugin/Reuse.pm +++ lib/Mail/SpamAssassin/Plugin/Reuse.pm @@ -22,7 +22,7 @@ mass-check output. package Mail::SpamAssassin::Plugin::Reuse; -use bytes; +# use bytes; use strict; use warnings; diff --git lib/Mail/SpamAssassin/Plugin/Rule2XSBody.pm lib/Mail/SpamAssassin/Plugin/Rule2XSBody.pm index 58d95ef68..b6f09918b 100644 --- lib/Mail/SpamAssassin/Plugin/Rule2XSBody.pm +++ lib/Mail/SpamAssassin/Plugin/Rule2XSBody.pm @@ -41,7 +41,7 @@ use Mail::SpamAssassin::Plugin::OneLineBodyRuleType; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/SPF.pm lib/Mail/SpamAssassin/Plugin/SPF.pm index 7f066a0ac..9ef772ae2 100644 --- lib/Mail/SpamAssassin/Plugin/SPF.pm +++ lib/Mail/SpamAssassin/Plugin/SPF.pm @@ -38,7 +38,7 @@ use Mail::SpamAssassin::Logger; use Mail::SpamAssassin::Timeout; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); @@ -232,7 +232,7 @@ working downwards until results are successfully parsed. =item has_check_for_spf_errors Adds capability check for "if can()" for check_for_spf_permerror, check_for_spf_temperror, check_for_spf_helo_permerror and check_for_spf_helo_permerror - + =cut sub has_check_for_spf_errors { 1 } @@ -506,9 +506,9 @@ sub _check_spf { $self->{spf_server} = Mail::SPF::Server->new( hostname => $scanner->get_tag('HOSTNAME'), dns_resolver => $self->{main}->{resolver}, - max_dns_interactive_terms => 15); + max_dns_interactive_terms => 20); # Bug 7112: max_dns_interactive_terms defaults to 10, but even 14 is - # not enough for ebay.com, setting it to 15 + # not enough for ebay.com, setting it to 15 NOTE: raising to 20 per bug 7182 1; } or do { $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; @@ -727,7 +727,7 @@ sub _check_spf { elsif ($result eq 'temperror') { $scanner->{spf_helo_temperror} = 1; } elsif ($result eq 'error') { $scanner->{spf_helo_temperror} = 1; } - if ($result eq 'fail') { # RFC 4408 6.2 + if ($result eq 'fail') { # RFC 7208 6.2 $scanner->{spf_helo_failure_comment} = "SPF failed: $comment"; } } else { @@ -740,7 +740,7 @@ sub _check_spf { elsif ($result eq 'temperror') { $scanner->{spf_temperror} = 1; } elsif ($result eq 'error') { $scanner->{spf_temperror} = 1; } - if ($result eq 'fail') { # RCF 4408 6.2 + if ($result eq 'fail') { # RFC 7208 6.2 $scanner->{spf_failure_comment} = "SPF failed: $comment"; } } diff --git lib/Mail/SpamAssassin/Plugin/Shortcircuit.pm lib/Mail/SpamAssassin/Plugin/Shortcircuit.pm index 005892f12..6083dadfc 100644 --- lib/Mail/SpamAssassin/Plugin/Shortcircuit.pm +++ lib/Mail/SpamAssassin/Plugin/Shortcircuit.pm @@ -45,7 +45,7 @@ use Mail::SpamAssassin::Plugin; use Mail::SpamAssassin::Logger; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/SpamCop.pm lib/Mail/SpamAssassin/Plugin/SpamCop.pm index 978d8c22d..052e0cd19 100644 --- lib/Mail/SpamAssassin/Plugin/SpamCop.pm +++ lib/Mail/SpamAssassin/Plugin/SpamCop.pm @@ -46,7 +46,7 @@ use Mail::SpamAssassin::Logger; use IO::Socket; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use constant HAS_NET_DNS => eval { require Net::DNS; }; diff --git lib/Mail/SpamAssassin/Plugin/Test.pm lib/Mail/SpamAssassin/Plugin/Test.pm index 1523afb13..0c6f80c89 100644 --- lib/Mail/SpamAssassin/Plugin/Test.pm +++ lib/Mail/SpamAssassin/Plugin/Test.pm @@ -36,7 +36,7 @@ package Mail::SpamAssassin::Plugin::Test; use Mail::SpamAssassin::Plugin; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/TextCat.pm lib/Mail/SpamAssassin/Plugin/TextCat.pm index c62ed7c6a..bc080a3ec 100644 --- lib/Mail/SpamAssassin/Plugin/TextCat.pm +++ lib/Mail/SpamAssassin/Plugin/TextCat.pm @@ -40,6 +40,9 @@ L for details. Note: the language cannot always be recognized with sufficient confidence. In that case, no action is taken. +You can use _TEXTCATRESULTS_ tag to view the internal ngram-scoring, it +might help fine-tuning settings. + =cut package Mail::SpamAssassin::Plugin::TextCat; @@ -48,7 +51,7 @@ use Mail::SpamAssassin::Plugin; use Mail::SpamAssassin::Logger; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); @@ -297,7 +300,14 @@ Rhaeto-Romance, Sanskrit, Scots, Slovenian, and Yiddish. =item textcat_max_languages N (default: 3) The maximum number of languages any one message can simultaneously match -before its classification is considered unknown. +before its classification is considered unknown. You can try reducing this +to 2 or possibly even 1 for more confident results, as it's unusual for a +message to contain multiple languages. + +Read description for textcat_acceptable_score also, as these settings are +closely related. Scoring affects how many languages might be matched and +here we set the "false positive limit" where we think the engine can't +decide what languages message really contain. =cut @@ -337,7 +347,20 @@ models (note that each of those models is used completely). =item textcat_acceptable_score N (default: 1.02) Include any language that scores at least C in the -returned list of languages. +returned list of languages. + +This setting is basically a percentile range. Any language having internal +ngram-score within N-percent of the best score is included into results. +Larger values than 1.05 are not recommended as it can generate many false +matches. A setting of 1.00 would mean a single best scoring language is +always forcibly selected, but this is not recommended as then +textcat_max_languages can't do its job classifying language as uncertain. + +Read the description for textcat_max_languages, as these are settings are +closely related. + +You can use _TEXTCATRESULTS_ tag to view the internal ngram-scoring, it +might help fine-tuning settings. =cut @@ -394,8 +417,9 @@ sub load_models { } sub classify { - my ($inputptr, $conf, %skip) = @_; + my ($inputptr, $opts, %skip) = @_; my %results; + my $conf = $opts->{conf}; my $maxp = $conf->{textcat_max_ngrams}; # create ngrams for input @@ -422,6 +446,14 @@ sub classify { my $best = $results{$results[0]}; + # Insert first 20 results in tag for debugging purposes + my @results_tag; + foreach (@results[0..19]) { + last unless defined $_; + push @results_tag, sprintf "%s:%s(%.02f)", $_, $results{$_}, $results{$_} / $best; + } + $opts->{permsgstatus}->set_tag('TEXTCATRESULTS', join(' ', @results_tag)); + my @answers = (shift(@results)); while (@results && $results{$results[0]} < ($conf->{textcat_acceptable_score} * $best)) { @answers = (@answers, shift(@results)); @@ -441,14 +473,22 @@ sub create_lm { my %ngram; my @sorted; + # Note that $$inputptr may or may not be in perl characters (utf8 flag set) + my $is_unicode = utf8::is_utf8($$inputptr); + # my $non_word_characters = qr/[0-9\s]/; - for my $word (split(/[0-9\s]+/, ${$_[0]})) + for my $word (split(/[0-9\s]+/, $$inputptr)) { - # Bug 6229: Current TextCat database only works well with - # lowercase input, lets work around it until it's properly - # generated and/or locale issues are resolved.. - $word =~ tr/A-Z\xc0-\xd6\xd8-\xde/a-z\xe0-\xf6\xf8-\xfe/ - if $word =~ /[A-Z]/ && $word =~ /[a-zA-Z\xc0-\xd6\xd8-\xde\xe0-\xf6\xf8-\xfe]{4}/; + # Bug 6229: Current TextCat database only works well with lowercase input + if ($is_unicode) { + # Unicode rules are used for the case change + $word = lc $word if $word =~ /\w{4}/; + utf8::encode($word); # encode Unicode characters to UTF-8 octets + } elsif ($word =~ /[A-Z]/ && + $word =~ /[a-zA-Z\xc0-\xd6\xd8-\xde\xe0-\xf6\xf8-\xfe]{4}/) { + # assume ISO 8859-1 / Windows-1252 + $word =~ tr/A-Z\xc0-\xd6\xd8-\xde/a-z\xe0-\xf6\xf8-\xfe/; + } $word = "\000" . $word . "\000"; my $len = length($word); my $flen = $len; @@ -506,7 +546,7 @@ sub extract_metadata { $skip{$_} = 1 for split(' ', $opts->{conf}->{inactive_languages}); delete $skip{$_} for split(' ', $opts->{conf}->{ok_languages}); dbg("textcat: classifying, skipping: " . join(" ", keys %skip)); - @matches = classify(\$body, $opts->{conf}, %skip); + @matches = classify(\$body, $opts, %skip); } else { dbg("textcat: message too short for language analysis"); diff --git lib/Mail/SpamAssassin/Plugin/TxRep.pm lib/Mail/SpamAssassin/Plugin/TxRep.pm index 5dc0cf29d..0d751c7cf 100644 --- lib/Mail/SpamAssassin/Plugin/TxRep.pm +++ lib/Mail/SpamAssassin/Plugin/TxRep.pm @@ -24,17 +24,21 @@ Mail::SpamAssassin::Plugin::TxRep - Normalize scores with sender reputation reco =head1 SYNOPSIS The TxRep (Reputation) plugin is designed as an improved replacement of the AWL -(Auto-Whitelist) plugin. It adjusts the final message spam score by looking up and -taking in consideration the reputation of the sender. +(Auto-Whitelist) plugin. It adjusts the final message spam score by looking up +and taking in consideration the reputation of the sender. -To try TxRep out, you B disable the AWL plugin (if present), back up its -database and add a line loading this module in init.pre (AWL may be enabled in v310.pre): +To try TxRep out, you B first disable the AWL plugin (if enabled), and +back up its database. AWL is loaded in v310.pre and can be disabled by +commenting out the loadplugin line: # loadplugin Mail::SpamAssassin::Plugin::AWL - loadplugin Mail::SpamAssassin::Plugin::TxRep When AWL is not disabled, TxRep will refuse to run. +TxRep should be enabled by uncommenting the following line in v341.pre: + + loadplugin Mail::SpamAssassin::Plugin::TxRep + Use the supplied 60_txreputation.cf file or add these lines to a .cf file: header TXREP eval:check_senders_reputation() @@ -113,7 +117,7 @@ description of the factor below. through SpamAssassin's API, AWL adjusts the historical total score of the plain email address without IP (and deleted records bound to an IP), but since during the reception new records with IP will be added, the blacklisted entry would cease acting during -scanning. TxRep always uses the record of th plain email address without IP together +scanning. TxRep always uses the record of the plain email address without IP together with the one bound to an IP address, DKIM signature, or SPF pass (unless the weight factor for the EMAIL reputation is set to zero). AWL uses the score of 100 (resp. -100) for the blacklisting (resp. whitelisting) purposes. TxRep increases the value @@ -197,7 +201,7 @@ package Mail::SpamAssassin::Plugin::TxRep; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use NetAddr::IP 4.000; # qw(:upper); @@ -648,11 +652,16 @@ Used by the SQLBasedAddrList storage implementation. If this option is set the SQLBasedAddrList module will keep separate database entries for DKIM-validated e-mail addresses and for non-validated -ones. A pre-requisite when setting this option is that a field awl.signedby -exists in a SQL table, otherwise SQL operations will fail (which is why we -need this option at all - for compatibility with pre-3.3.0 database schema). -A plugin DKIM should also be enabled, as otherwise there is no benefit from -turning on this option. +ones. Without this option, or for domains that do not use a DKIM signature, +the reputation of legitimate email can get mixed with the reputation of +forgeries. A pre-requisite when setting this option is that a field +txrep.signedby exists in a SQL table, otherwise SQL operations will fail. +A DKIM plugin must also be enabled in order for this option to take effect. +This option is highly recommended. Unless you are using a pre-3.3.0 database +schema and cannot upgrade, there is no reason to disable this option. If +you are upgrading from AWL and using a pre-3.3.0 schema, the txrep.signedby +column will not exist. It is recommended that you add this column, but if +that is not possible you must set this option to 0 to avoid SQL errors. =cut # ................................................................... push (@cmds, { @@ -1218,6 +1227,7 @@ sub check_senders_reputation { my $autolearn = defined $self->{autolearn}; $self->{last_pms} = $self->{autolearn} = undef; + # Cases where we would not be able to use TxRep return 0 unless ($self->{conf}->{use_txrep}); if ($self->{conf}->{use_auto_whitelist}) { warn("TxRep: cannot run when Auto-Whitelist is enabled. Please disable it!\n"); @@ -1246,13 +1256,17 @@ sub check_senders_reputation { my $domain = $from; $domain =~ s/^.+@//; + # Find the last untrusted relay and populate helo and original IP my ($origip, $helo); if (defined $pms->{relays_trusted} || defined $pms->{relays_untrusted}) { my $trusteds = @{$pms->{relays_trusted}}; foreach my $rly ( @{$pms->{relays_trusted}}, @{$pms->{relays_untrusted}} ) { # Get the last found HELO, regardless of private/public or trusted/untrusted # Avoiding a redundant duplicate entry if HELO is equal/similar to another identificator - if (defined $rly->{helo} && $rly->{helo} !~ /^\[?$rly->{ip}\]?$/ && $rly->{helo} !~ /$domain/i && $rly->{helo} !~ /$from/i ) { + if (defined $rly->{helo} && + $rly->{helo} !~ /^\[?\Q$rly->{ip}\E\]?$/ && + $rly->{helo} !~ /^\Q$domain\E$/i && + $rly->{helo} !~ /^\Q$from\E$/i ) { $helo = $rly->{helo}; } # use only trusted ID, but use the first untrusted IP (if available) (AWL bug 6908) @@ -1264,6 +1278,7 @@ sub check_senders_reputation { } } + # Look for previous scores of the same message, for instance when doing re-learning if ($self->{conf}->{txrep_track_messages}) { if ($msg_id) { my $msg_rep = $self->check_reputations($pms, 'MSG_ID', $msg_id, undef, $date, undef); @@ -1304,7 +1319,10 @@ sub check_senders_reputation { } } + # Get the signing domain my $signedby = ($self->{conf}->{auto_whitelist_distinguish_signed})? $pms->get_tag('DKIMDOMAIN') : undef; + + # Summary of all information we've gathered so far dbg("TxRep: active, %s pre-score: %s, autolearn score: %s, IP: %s, address: %s %s", $msg_id || '', $pms->{score} || '?', @@ -1326,28 +1344,41 @@ sub check_senders_reputation { my $totalweight = 0; $self->{totalweight} = $totalweight; - $delta += $self->check_reputations($pms, 'EMAIL_IP', $from, $ip, $signedby, $msgscore); - if ($domain) {$delta += $self->check_reputations($pms, 'DOMAIN', $domain, $ip, $signedby, $msgscore);} - if ($helo) {$delta += $self->check_reputations($pms, 'HELO', $helo, undef, 'HELO', $msgscore);} + # Get current reputation info + $delta += $self->check_reputations($pms, 'EMAIL_IP', $from, $ip, $signedby, $msgscore); + + if ($domain) { + $delta += $self->check_reputations($pms, 'DOMAIN', $domain, $ip, $signedby, $msgscore); + } + if ($helo) { + $delta += $self->check_reputations($pms, 'HELO', $helo, undef, 'HELO', $msgscore); + } if ($origip) { - if (!$signedby) {$delta += $self->check_reputations($pms, 'EMAIL', $from, undef, undef, $msgscore);} - $delta += $self->check_reputations($pms, 'IP', $origip, undef, undef, $msgscore); + if (!$signedby) { + $delta += $self->check_reputations($pms, 'EMAIL', $from, undef, undef, $msgscore); + } + $delta += $self->check_reputations($pms, 'IP', $origip, undef, undef, $msgscore); } + # Learn against this message and store reputation if (!defined $self->{learning}) { $delta = ($self->{totalweight})? $self->{conf}->{txrep_factor} * $delta / $self->{totalweight} : 0; if ($delta) { - $pms->got_hit("TXREP", "TXREP: ", ruletype => 'eval', score => sprintf("%0.3f", $delta)); + $pms->got_hit("TXREP", "TXREP: ", ruletype => 'eval', score => sprintf("%0.3f", $delta)); } $msgscore += $delta; if (defined $pms->{score}) { - dbg("TxRep: post-TxRep score: %.3f", $pms->{score}); + dbg("TxRep: post-TxRep score: %.3f", $pms->{score}); } } + # Track message ID if ($self->{conf}->{txrep_track_messages} && $msg_id) { $self->check_reputations($pms, 'MSG_ID', $msg_id, undef, $date, $msgscore); } - if (!defined $self->{txKeepStoreTied}) {$self->finish();} + # Close any open resources + if (!defined $self->{txKeepStoreTied}) { + $self->finish(); + } return 0; } @@ -1361,14 +1392,16 @@ sub check_reputations { if ($self->open_storages()) { if ($self->{conf}->{txrep_user2global_ratio} && $self->{user_storage} != $self->{global_storage}) { - my $user = $self->check_reputation('user_storage', @_); - my $global = $self->check_reputation('global_storage',@_); + my $user = $self->check_reputation('user_storage', @_); + my $global = $self->check_reputation('global_storage',@_); - $delta = (defined $user && $user==$user) ? - ( $self->{conf}->{txrep_user2global_ratio} * $user + $global ) / ( 1 + $self->{conf}->{txrep_user2global_ratio} ) : - $global; + if (defined $user and $user == $user) { + $delta = ( $self->{conf}->{txrep_user2global_ratio} * $user + $global ) / ( 1 + $self->{conf}->{txrep_user2global_ratio} ); + } else { + $delta = $global; + } } else { - $delta = $self->check_reputation(undef,@_); + $delta = $self->check_reputation(undef,@_); } } return $delta; @@ -1383,6 +1416,29 @@ sub check_reputation { my $delta = 0; my $weight = ($key eq 'MSG_ID')? 1 : eval('$pms->{main}->{conf}->{txrep_weight_'.lc($key).'}'); +# { +# #Bug 7164, trying to find out reason for these: _WARN: Use of uninitialized value $msgscore in addition (+) at /usr/share/perl5/vendor_perl/Mail/SpamAssassin/Plugin/TxRep.pm line 1415. +# no warnings; +# +# unless (defined $msgscore) { +# #Output some params and the calling function so we can identify more about this bug +# dbg("TxRep: MsgScore Undefined (bug 7164) - check_reputation Parameters: self: $self storage: $storage pms: $pms, key: $key, id: $id, ip: $ip, signedby: $signedby, msgscore: $msgscore"); +# dbg("TxRep: MsgScore Undefined (bug 7164) - weight: $weight"); +# +# my ($package, $filename, $line) = caller(); +# +# chomp($package); +# chomp($filename); +# chomp($line); +# +# dbg("TxRep: MsgScore Undefined (bug 7164) - Caller Info: Package: $package - Filename: $filename - Line: $line"); +# +# #Define $msgscore as a triage to hide warnings while we find the root cause +# #$msgscore = 0; +# } +# } + + if (defined $weight && $weight) { my $meanrep; my $timer = $self->{main}->time_method('check_txrep_'.lc($key)); @@ -1410,11 +1466,16 @@ sub check_reputation { $self->{totalweight} += $weight; if ($key eq 'MSG_ID' && $self->count() > 0) { $delta = $self->total() / $self->count(); - $pms->set_tag('TXREP'.$tag_id, sprintf("%2.1f",$delta)); + $pms->set_tag('TXREP'.$tag_id, sprintf("%2.1f", $delta)); } elsif (defined $self->total()) { - $delta = ($self->total() + $msgscore) / (1 + $self->count()) - $msgscore; + #Bug 7164 - $msgscore undefined + if (defined $msgscore) { + $delta = ($self->total() + $msgscore) / (1 + $self->count()) - $msgscore; + } else { + $delta = ($self->total()) / (1 + $self->count()); + } - $pms->set_tag('TXREP_'.$tag_id, sprintf("%2.1f",$delta)); + $pms->set_tag('TXREP_'.$tag_id, sprintf("%2.1f", $delta)); if (defined $meanrep) { $pms->set_tag('TXREP_'.$tag_id.'_MEAN', sprintf("%2.1f", $meanrep)); } @@ -1448,7 +1509,9 @@ sub check_reputation { $self->{checker}->remove_entry($self->{entry}); #forgetting the message ID } } - if (defined $storage) {$self->{checker} = $self->{default_storage};} + if (defined $storage) { + $self->{checker} = $self->{default_storage}; + } return ($weight || 0) * ($delta || 0); } @@ -1566,7 +1629,8 @@ sub open_storages { ########################################################################### my $self = shift; - return 1 unless (!defined $self->{default_storage}); + # disabled per bug 7191 + #return 1 unless (!defined $self->{default_storage}); my $factory; if ($self->{main}->{pers_addr_list_factory}) { @@ -1594,7 +1658,7 @@ sub open_storages { # TODO: add an a method to the handler class instead my ($storage_type, $is_global); - if (ref($factory) =~ /SQLasedAddrList/) { + if (ref($factory) =~ /SQLBasedAddrList/) { $is_global = defined $self->{conf}->{user_awl_sql_override_username}; $storage_type = 'SQL'; if ($is_global && $self->{conf}->{user_awl_sql_override_username} eq $self->{main}->{username}) { diff --git lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm index 9602eba4e..2e854957a 100644 --- lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm +++ lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm @@ -294,11 +294,11 @@ package Mail::SpamAssassin::Plugin::URIDNSBL; use Mail::SpamAssassin::Plugin; use Mail::SpamAssassin::Constants qw(:ip); -use Mail::SpamAssassin::Util; +use Mail::SpamAssassin::Util qw(idn_to_ascii); use Mail::SpamAssassin::Logger; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); @@ -901,6 +901,7 @@ sub query_hosts_or_domains { sub lookup_domain_ns { my ($self, $pms, $obj, $dom, $rulename) = @_; + $dom = idn_to_ascii($dom); my $key = "NS:" . $dom; my $ent = { key => $key, zone => $dom, obj => $obj, type => "URI-NS", @@ -942,9 +943,8 @@ sub complete_ns_lookup { next unless (defined($str) && defined($dom)); dbg("uridnsbl: got($j) NS for $dom: $str"); - if ($str =~ /IN\s+NS\s+(\S+)/) { - my $nsmatch = lc $1; - $nsmatch =~ s/\.$//; + if ($rr->type eq 'NS') { + my $nsmatch = lc $rr->nsdname; # available since at least Net::DNS 0.14 my $nsrhblstr = $nsmatch; my $fullnsrhblstr = $nsmatch; @@ -987,6 +987,7 @@ sub complete_ns_lookup { sub lookup_a_record { my ($self, $pms, $obj, $hname, $rulename) = @_; + $hname = idn_to_ascii($hname); my $key = "A:" . $hname; my $ent = { key => $key, zone => $hname, obj => $obj, type => "URI-A", @@ -1010,25 +1011,19 @@ sub complete_a_lookup { dbg("uridnsbl: complete_a_lookup aborted %s", $ent->{key}); return; } - dbg("uridnsbl: complete_a_lookup %s", $ent->{key}); - my @answer = $pkt->answer; + $hname = '' if !defined $hname; my $j = 0; + my @answer = $pkt->answer; foreach my $rr (@answer) { $j++; - my $str = $rr->string; - if (!defined $hname) { - warn "complete_a_lookup-1: $j, (hname is undef), $str"; - } elsif (!defined $str) { - warn "complete_a_lookup-2: $j, $hname, (str is undef)"; - next; - } - dbg("uridnsbl: complete_a_lookup got(%d) A for %s: %s", $j,$hname,$str); - - local $1; - if ($str =~ /IN\s+A\s+(\S+)/) { - $self->lookup_dnsbl_for_ip($pms, $ent->{obj}, $1); - } + next if $rr->type ne 'A'; + # Net::DNS::RR::A::address() is available since Net::DNS 0.69 + my $ip_address = $rr->UNIVERSAL::can('address') ? $rr->address + : $rr->rdatastr; + dbg("uridnsbl: complete_a_lookup got(%d) A for %s: %s", + $j, $hname, $ip_address); + $self->lookup_dnsbl_for_ip($pms, $ent->{obj}, $ip_address); } } @@ -1038,7 +1033,8 @@ sub lookup_dnsbl_for_ip { my ($self, $pms, $obj, $ip) = @_; local($1,$2,$3,$4); - $ip =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/; + $ip =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/ + or warn "lookup_dnsbl_for_ip: not an IPv4 address: $ip\n"; my $revip = "$4.$3.$2.$1"; my $conf = $pms->{conf}; @@ -1060,6 +1056,9 @@ sub lookup_dnsbl_for_ip { sub lookup_single_dnsbl { my ($self, $pms, $obj, $rulename, $lookupstr, $dnsbl, $qtype) = @_; + $lookupstr = idn_to_ascii($lookupstr); + $dnsbl = idn_to_ascii($dnsbl); + my $key = "DNSBL:" . $lookupstr . ':' . $dnsbl; my $ent = { key => $key, zone => $dnsbl, obj => $obj, type => 'URI-DNSBL', @@ -1100,15 +1099,18 @@ sub complete_dnsbl_lookup { my $rr_type = $rr->type; if ($rr_type eq 'A') { - $rdatastr = $rr->rdatastr; + # Net::DNS::RR::A::address() is available since Net::DNS 0.69 + $rdatastr = $rr->UNIVERSAL::can('address') ? $rr->address + : $rr->rdatastr; if ($rdatastr =~ m/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/) { $rdatanum = Mail::SpamAssassin::Util::my_inet_aton($rdatastr); } } elsif ($rr_type eq 'TXT') { - # txtdata returns a non- zone-file-format encoded result, unlike rdatastr; + # txtdata returns a non- zone-file-format encoded result, unlike rdstring; # avoid space-separated RDATA fields if possible; # txtdata provides a list of strings in list context since Net::DNS 0.69 $rdatastr = join('',$rr->txtdata); + utf8::encode($rdatastr) if utf8::is_utf8($rdatastr); } else { next; } diff --git lib/Mail/SpamAssassin/Plugin/URIDetail.pm lib/Mail/SpamAssassin/Plugin/URIDetail.pm index 22769dc89..9f694792b 100644 --- lib/Mail/SpamAssassin/Plugin/URIDetail.pm +++ lib/Mail/SpamAssassin/Plugin/URIDetail.pm @@ -72,7 +72,7 @@ use Mail::SpamAssassin::Util qw(untaint_var); use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/URIEval.pm lib/Mail/SpamAssassin/Plugin/URIEval.pm index a94400fdc..f9f27967f 100644 --- lib/Mail/SpamAssassin/Plugin/URIEval.pm +++ lib/Mail/SpamAssassin/Plugin/URIEval.pm @@ -22,7 +22,7 @@ use Mail::SpamAssassin::Logger; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/Plugin/URILocalBL.pm lib/Mail/SpamAssassin/Plugin/URILocalBL.pm index e190fabce..216a216ec 100644 --- lib/Mail/SpamAssassin/Plugin/URILocalBL.pm +++ lib/Mail/SpamAssassin/Plugin/URILocalBL.pm @@ -108,7 +108,7 @@ use Socket; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use version; @@ -350,7 +350,7 @@ sub check_uri_local_bl { # look for W3 links only next unless (defined $info->{types}->{a}); - while (my($host, $domain) = each $info->{hosts}) { + while (my($host, $domain) = each %{$info->{hosts}}) { # skip if the domain name was matched if (exists $rule->{exclusions} && exists $rule->{exclusions}->{$domain}) { @@ -374,7 +374,7 @@ sub check_uri_local_bl { } if (exists $rule->{countries}) { - dbg("check: uri_local_bl countries %s\n", join(' ', sort keys $rule->{countries})); + dbg("check: uri_local_bl countries %s\n", join(' ', sort keys %{$rule->{countries}})); my $cc = $self->{geoip}->country_code_by_addr($ip); @@ -403,7 +403,7 @@ sub check_uri_local_bl { } if (exists $rule->{isps}) { - dbg("check: uri_local_bl isps %s\n", join(' ', map { '"' . $_ . '"'; } sort keys $rule->{isps})); + dbg("check: uri_local_bl isps %s\n", join(' ', map { '"' . $_ . '"'; } sort keys %{$rule->{isps}})); my $isp = $self->{geoisp}->isp_by_name($ip); diff --git lib/Mail/SpamAssassin/Plugin/VBounce.pm lib/Mail/SpamAssassin/Plugin/VBounce.pm index 28370fc71..564517614 100644 --- lib/Mail/SpamAssassin/Plugin/VBounce.pm +++ lib/Mail/SpamAssassin/Plugin/VBounce.pm @@ -162,6 +162,7 @@ sub _relay_is_in_whitelist_bounce_relays { sub _relay_is_in_list { my ($self, $list, $pms, $relay) = @_; $relay = lc $relay; + utf8::encode($relay) if utf8::is_utf8($relay); # encode chars to UTF-8 if (defined $list->{$relay}) { return 1; } diff --git lib/Mail/SpamAssassin/Plugin/WLBLEval.pm lib/Mail/SpamAssassin/Plugin/WLBLEval.pm index 159256014..139d1436b 100644 --- lib/Mail/SpamAssassin/Plugin/WLBLEval.pm +++ lib/Mail/SpamAssassin/Plugin/WLBLEval.pm @@ -17,14 +17,16 @@ package Mail::SpamAssassin::Plugin::WLBLEval; -use Mail::SpamAssassin::Plugin; -use Mail::SpamAssassin::Logger; - use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; +use NetAddr::IP 4.000; + +use Mail::SpamAssassin::Plugin; +use Mail::SpamAssassin::Logger; + use vars qw(@ISA); @ISA = qw(Mail::SpamAssassin::Plugin); @@ -310,23 +312,51 @@ sub _check_whitelist_rcvd { foreach my $white_addr (keys %{$list}) { my $regexp = qr/$list->{$white_addr}{re}/i; foreach my $domain (@{$list->{$white_addr}{domain}}) { + # $domain is a second param in whitelist_from_rcvd: a domain name or an IP address if ($addr =~ $regexp) { + # From or sender address matching the first param in whitelist_from_rcvd my $match; foreach my $lastunt (@relays) { - local $1; - if ($domain =~ m{^ \[ (.*) \] \z}sx) { # matching by IP address + local($1,$2); + if ($domain =~ m{^ \[ (.*) \] ( / \d{1,3} )? \z}sx) { + # matching by IP address my($wl_ip, $rly_ip) = ($1, $lastunt->{ip}); + $wl_ip .= $2 if defined $2; # allow prefix len even after bracket + if (!defined $rly_ip || $rly_ip eq '') { # relay's IP address not provided or unparseable - } elsif ($wl_ip =~ /^\d+\.\d+\.\d+\.\d+\z/) { + + } elsif ($wl_ip =~ /^\d+\.\d+\.\d+\.\d+\z/s) { + # an IPv4 whitelist entry can only be matched by an IPv4 relay if ($wl_ip eq $rly_ip) { $match = 1; last } # exact match - } elsif ($wl_ip =~ /^[\d\.]+\z/) { # assume IPv4 classful subnet + + } elsif ($wl_ip =~ /^[\d\.]+\z/s) { # an IPv4 classful subnet? $wl_ip =~ s/\.*\z/./; # enforce trailing dot - if ($rly_ip =~ /^\Q$wl_ip\E/i) { $match = 1; last } # subnet + if ($rly_ip =~ /^\Q$wl_ip\E/) { $match = 1; last } # subnet + + } else { # either an wl entry is an IPv6 addr, or has a prefix len + my $rly_ip_obj = NetAddr::IP->new($rly_ip); # TCP-info field + if (!defined $rly_ip_obj) { + dbg("rules: bad IP address in relay: %s, sender: %s", + $rly_ip, $addr); + } else { + my $wl_ip_obj = NetAddr::IP->new($wl_ip); # whitelist 2nd param + if (!defined $wl_ip_obj) { + info("rules: bad IP address in whitelist: %s", $wl_ip); + } elsif ($wl_ip_obj->contains($rly_ip_obj)) { + # note: an IPv4-compatible IPv6 address can match an IPv4 addr + dbg("rules: relay addr %s matches whitelist %s, sender: %s", + $rly_ip, $wl_ip_obj, $addr); + $match = 1; last; + } else { + dbg("rules: relay addr %s does not match wl %s, sender %s", + $rly_ip, $wl_ip_obj, $addr); + } + } } - # todo: handle IPv6 and CIDR notation - } else { # match by a rdns name + + } else { # match by an rdns name my $rdns = $lastunt->{lc_rdns}; if ($rdns =~ /(?:^|\.)\Q${domain}\E$/i) { $match=1; last } } diff --git lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm index 8033b5999..21d9801f0 100644 --- lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm +++ lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm @@ -46,7 +46,7 @@ package Mail::SpamAssassin::Plugin::WhiteListSubject; use Mail::SpamAssassin::Plugin; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw(@ISA); diff --git lib/Mail/SpamAssassin/PluginHandler.pm lib/Mail/SpamAssassin/PluginHandler.pm index 94bd2a646..756304dfd 100644 --- lib/Mail/SpamAssassin/PluginHandler.pm +++ lib/Mail/SpamAssassin/PluginHandler.pm @@ -30,7 +30,7 @@ use Mail::SpamAssassin::Logger; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use File::Spec; diff --git lib/Mail/SpamAssassin/RegistryBoundaries.pm lib/Mail/SpamAssassin/RegistryBoundaries.pm index dde6a34b7..d1fab9398 100644 --- lib/Mail/SpamAssassin/RegistryBoundaries.pm +++ lib/Mail/SpamAssassin/RegistryBoundaries.pm @@ -27,12 +27,15 @@ package Mail::SpamAssassin::RegistryBoundaries; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; our @ISA = qw(); use vars qw(%US_STATES); +use Mail::SpamAssassin::Logger; +use Mail::SpamAssassin::Util qw(idn_to_ascii); + # called from SpamAssassin->init() to create $self->{util_rb} sub new { my $class = shift; @@ -46,7 +49,8 @@ sub new { bless ($self, $class); # Initialize valid_tlds_re for schemeless uri parsing, FreeMail etc - if ($self->{conf}->{valid_tlds}) { + if ($self->{conf}->{valid_tlds} && %{$self->{conf}->{valid_tlds}}) { + # International domain names are already in ASCII-compatible encoding (ACE) my $tlds = join('|', keys %{$self->{conf}->{valid_tlds}}); # Perl 5.10+ trie optimizes lists, no need for fancy regex optimizing $self->{valid_tlds_re} = qr/(?:$tlds)/i; @@ -87,9 +91,9 @@ Examples: =cut sub split_domain { - my $self = shift; - my $domain = lc shift; + my ($self, $domain) = @_; + $domain = idn_to_ascii($domain); my $hostname = ''; if (defined $domain && $domain ne '') { @@ -126,12 +130,14 @@ sub split_domain { } else { my $temp = join(".", @domparts); + # International domain names in ASCII-compatible encoding (ACE) last if ($self->{conf}->{three_level_domains}{$temp}); } } elsif (@domparts == 2) { # co.uk, etc. my $temp = join(".", @domparts); + # International domain names in ASCII-compatible encoding (ACE) last if ($self->{conf}->{two_level_domains}{$temp}); } push(@hostname, shift @domparts); @@ -185,12 +191,13 @@ uses a valid TLD or ccTLD. =cut sub is_domain_valid { - my $self = shift; - my $dom = lc shift; + my ($self, $dom) = @_; # domains don't have whitespace return 0 if ($dom =~ /\s/); + $dom = idn_to_ascii($dom); + # ensure it ends in a known-valid TLD, and has at least 1 dot return 0 unless ($dom =~ /\.([^.]+)$/); return 0 unless ($self->{conf}->{valid_tlds}{$1}); diff --git lib/Mail/SpamAssassin/Reporter.pm lib/Mail/SpamAssassin/Reporter.pm index 9955d0a46..c4b70291a 100644 --- lib/Mail/SpamAssassin/Reporter.pm +++ lib/Mail/SpamAssassin/Reporter.pm @@ -21,7 +21,7 @@ package Mail::SpamAssassin::Reporter; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Mail::SpamAssassin::Logger; diff --git lib/Mail/SpamAssassin/SQLBasedAddrList.pm lib/Mail/SpamAssassin/SQLBasedAddrList.pm index c0a5c24ac..2641d1d02 100644 --- lib/Mail/SpamAssassin/SQLBasedAddrList.pm +++ lib/Mail/SpamAssassin/SQLBasedAddrList.pm @@ -75,7 +75,7 @@ package Mail::SpamAssassin::SQLBasedAddrList; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; # Do this silliness to stop RPM from finding DBI as required @@ -220,6 +220,7 @@ sub get_addr_entry { } push(@args, @signedby); } + $sql .= " ORDER BY last_hit"; my $sth = $self->{dbh}->prepare($sql); my $rc = $sth->execute($self->{_username}, @args); @@ -236,8 +237,8 @@ sub get_addr_entry { # an author domain and by a remailer)? for now just take an average while ( defined($aryref = $sth->fetchrow_arrayref()) ) { if (defined $entry->{count} && defined $aryref->[1]) { - $entry->{count} += $aryref->[0]; - $entry->{totscore} += $aryref->[1]; + $entry->{count} = $aryref->[0]; + $entry->{totscore} = $aryref->[1]; } $entry->{exists_p} = 1; $cnt++; diff --git lib/Mail/SpamAssassin/SpamdForkScaling.pm lib/Mail/SpamAssassin/SpamdForkScaling.pm index b6b95f0bc..a69ac59e0 100644 --- lib/Mail/SpamAssassin/SpamdForkScaling.pm +++ lib/Mail/SpamAssassin/SpamdForkScaling.pm @@ -21,7 +21,7 @@ package Mail::SpamAssassin::SpamdForkScaling; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Errno qw(); diff --git lib/Mail/SpamAssassin/SubProcBackChannel.pm lib/Mail/SpamAssassin/SubProcBackChannel.pm index 826e6f89d..04885972e 100644 --- lib/Mail/SpamAssassin/SubProcBackChannel.pm +++ lib/Mail/SpamAssassin/SubProcBackChannel.pm @@ -21,7 +21,7 @@ package Mail::SpamAssassin::SubProcBackChannel; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use IO::Socket; diff --git lib/Mail/SpamAssassin/Timeout.pm lib/Mail/SpamAssassin/Timeout.pm index 80dd51353..cc4760932 100644 --- lib/Mail/SpamAssassin/Timeout.pm +++ lib/Mail/SpamAssassin/Timeout.pm @@ -55,7 +55,7 @@ package Mail::SpamAssassin::Timeout; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Time::HiRes qw(time); diff --git lib/Mail/SpamAssassin/Util.pm lib/Mail/SpamAssassin/Util.pm index f85241830..fac1f8a31 100644 --- lib/Mail/SpamAssassin/Util.pm +++ lib/Mail/SpamAssassin/Util.pm @@ -42,7 +42,7 @@ package Mail::SpamAssassin::Util; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; require 5.008001; # needs utf8::is_utf8() @@ -59,10 +59,13 @@ BEGIN { @ISA = qw(Exporter); @EXPORT = (); - @EXPORT_OK = qw(&local_tz &base64_decode &untaint_var &untaint_file_path + @EXPORT_OK = qw(&local_tz &base64_decode &base64_encode + &untaint_var &untaint_file_path &exit_status_str &proc_status_ok &am_running_on_windows &reverse_ip_address &decode_dns_question_entry - &secure_tmpfile &secure_tmpdir &uri_list_canonicalize); + &secure_tmpfile &secure_tmpdir &uri_list_canonicalize + &get_my_locales &parse_rfc822_date &idn_to_ascii + &is_valid_utf_8); } use Mail::SpamAssassin; @@ -74,6 +77,7 @@ use File::Basename; use Time::Local; use Sys::Hostname (); # don't import hostname() into this namespace! use NetAddr::IP 4.000; +use Scalar::Util qw(tainted); use Fcntl; use Errno qw(ENOENT EACCES EEXIST); use POSIX qw(:sys_wait_h WIFEXITED WIFSIGNALED WIFSTOPPED WEXITSTATUS @@ -84,18 +88,55 @@ use POSIX qw(:sys_wait_h WIFEXITED WIFSIGNALED WIFSTOPPED WEXITSTATUS use constant HAS_MIME_BASE64 => eval { require MIME::Base64; }; use constant RUNNING_ON_WINDOWS => ($^O =~ /^(?:mswin|dos|os2)/oi); -# These are not implemented on windows (see bug 6798 and 6470) +# These are only defined as stubs on Windows (see bugs 6798 and 6470). BEGIN { if (RUNNING_ON_WINDOWS) { + no warnings 'redefine'; + + # See the section on $? at + # http://perldoc.perl.org/perlvar.html#Error-Variables for some + # hints on the magic numbers that are used here. *WIFEXITED = sub { not $_[0] & 127 }; *WEXITSTATUS = sub { $_[0] >> 8 }; - *WIFSIGNALED = sub { ($_[0] & 127) && ($_[0] & 127 != 127) }; + *WIFSIGNALED = sub { ($_[0] & 127) && (($_[0] & 127) != 127) }; *WTERMSIG = sub { $_[0] & 127 }; } } ########################################################################### +our $ALT_FULLSTOP_UTF8_RE; +BEGIN { + # Bug 6751: + # RFC 3490 (IDNA): Whenever dots are used as label separators, the + # following characters MUST be recognized as dots: U+002E (full stop), + # U+3002 (ideographic full stop), U+FF0E (fullwidth full stop), + # U+FF61 (halfwidth ideographic full stop). + # RFC 5895: [...] the IDEOGRAPHIC FULL STOP character (U+3002) + # can be mapped to the FULL STOP before label separation occurs. + # [...] Only the IDEOGRAPHIC FULL STOP character (U+3002) is added in + # this mapping because the authors have not fully investigated [...] + # Adding also 'SMALL FULL STOP' (U+FE52) as seen in the wild, + # and a 'ONE DOT LEADER' (U+2024). + # + no bytes; # make sure there is no 'use bytes' in effect + my $dot_chars = "\x{2024}\x{3002}\x{FF0E}\x{FF61}\x{FE52}"; # \x{002E} + my $dot_bytes = join('|', split(//,$dot_chars)); utf8::encode($dot_bytes); + $ALT_FULLSTOP_UTF8_RE = qr/$dot_bytes/so; +} + +########################################################################### + +our $have_libidn; +BEGIN { + eval { require Net::LibIDN } and do { $have_libidn = 1 }; +} + +$have_libidn or warn "INFO: module Net::LibIDN not available,\n". + " internationalized domain names with U-labels will not be recognized!\n"; + +########################################################################### + # find an executable in the current $PATH (or whatever for that platform) { # Show the PATH we're going to explore only once. @@ -237,10 +278,11 @@ sub untaint_file_path { return '' if ($path eq ''); local ($1); - # Barry Jaspan: allow ~ and spaces, good for Windows. Also return '' - # if input is '', as it is a safe path. - my $chars = '-_A-Za-z0-9\xA0-\xFF\.\%\@\=\+\,\/\\\:'; - my $re = qr/^\s*([$chars][${chars}~ ]*)$/o; + # Barry Jaspan: allow ~ and spaces, good for Windows. + # Also return '' if input is '', as it is a safe path. + # Bug 7264: allow also parenthesis, e.g. "C:\Program Files (x86)" + my $chars = '-_A-Za-z0-9.%=+,/:()\\@\\xA0-\\xFF\\\\'; + my $re = qr{^\s*([$chars][${chars}~ ]*)\z}o; if ($path =~ $re) { $path = $1; @@ -338,6 +380,95 @@ sub taint_var { ########################################################################### +# returns true if the provided string of octets represents a syntactically +# valid UTF-8 string, otherwise a false is returned +# +sub is_valid_utf_8($) { +# my $octets = $_[0]; + return undef if !defined $_[0]; + # + # RFC 6532: UTF8-non-ascii = UTF8-2 / UTF8-3 / UTF8-4 + # RFC 3629 section 4: Syntax of UTF-8 Byte Sequences + # UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 + # UTF8-1 = %x00-7F + # UTF8-2 = %xC2-DF UTF8-tail + # UTF8-3 = %xE0 %xA0-BF UTF8-tail / + # %xE1-EC 2( UTF8-tail ) / + # %xED %x80-9F UTF8-tail / + # # U+D800..U+DFFF are utf16 surrogates, not legal utf8 + # %xEE-EF 2( UTF8-tail ) + # UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / + # %xF1-F3 3( UTF8-tail ) / + # %xF4 %x80-8F 2( UTF8-tail ) + # UTF8-tail = %x80-BF + # + # loose variant: + # [\x00-\x7F] | [\xC0-\xDF][\x80-\xBF] | + # [\xE0-\xEF][\x80-\xBF]{2} | [\xF0-\xF4][\x80-\xBF]{3} + # + $_[0] =~ /^ (?: [\x00-\x7F] | + [\xC2-\xDF] [\x80-\xBF] | + \xE0 [\xA0-\xBF] [\x80-\xBF] | + [\xE1-\xEC] [\x80-\xBF]{2} | + \xED [\x80-\x9F] [\x80-\xBF] | + [\xEE-\xEF] [\x80-\xBF]{2} | + \xF0 [\x90-\xBF] [\x80-\xBF]{2} | + [\xF1-\xF3] [\x80-\xBF]{3} | + \xF4 [\x80-\x8F] [\x80-\xBF]{2} )* \z/xs ? 1 : 0; +} + +# Given an international domain name with U-labels (UTF-8 or Unicode chars) +# converts it to ASCII-compatible encoding (ACE). If the argument is in +# ASCII (or is an invalid IDN), returns it lowercased but otherwise unchanged. +# The result is always in octets (utf8 flag off) even if the argument was in +# Unicode characters. +# +sub idn_to_ascii($) { + no bytes; # make sure there is no 'use bytes' in effect + return undef if !defined $_[0]; + my $s = "$_[0]"; # stringify + # propagate taintedness of the argument, but not its utf8 flag + my $t = tainted($s); # taintedness of the argument + if ($t) { # untaint $s, avoids taint-related bugs in LibIDN or in old perl + no re 'taint'; local $1; $s =~ /^(.*)\z/s; + } + # encode chars to UTF-8, leave octets unchanged (not necessarily valid UTF-8) + utf8::encode($s) if utf8::is_utf8($s); + if ($s !~ tr/\x00-\x7F//c) { # is all-ASCII (including IP address literal) + $s = lc $s; + } elsif (!is_valid_utf_8($s)) { + my($package, $filename, $line) = caller; + info("util: idn_to_ascii: not valid UTF-8: /%s/, called from %s line %d", + $s, $package, $line); + $s = lc $s; # garbage-in / garbage-out + } else { # is valid UTF-8 but not all-ASCII + my $chars; + # RFC 3490 (IDNA): Whenever dots are used as label separators, the + # following characters MUST be recognized as dots: U+002E (full stop), + # U+3002 (ideographic full stop), U+FF0E (fullwidth full stop), + # U+FF61 (halfwidth ideographic full stop). + if ($s =~ s/$ALT_FULLSTOP_UTF8_RE/./gso) { + info("util: idn_to_ascii: alternative dots normalized: /%s/ -> /%s/", + $_[0], $s); + } + if (!$have_libidn) { + $s = lc $s; + } else { + # to ASCII-compatible encoding (ACE), lowercased + my $sa = Net::LibIDN::idn_to_ascii($s, 'UTF-8'); + if (!defined $sa) { + info("util: idn_to_ascii: conversion to ACE failed: /%s/", $s); + } else { + info("util: idn_to_ascii: converted to ACE: /%s/ -> /%s/", $s, $sa); + $s = $sa; + } + } + } + $t ? taint_var($s) : $s; # propagate taintedness of the argument +} + +########################################################################### + # map process termination status number to an informative string, and # append optional mesage (dual-valued errno or a string or a number), # returning the resulting string @@ -691,6 +822,7 @@ sub base64_decode { m|^(?:[A-Za-z0-9+/=]{2,}={0,2})$|s) { # only use MIME::Base64 when the XS and Perl are both correct and quiet + local $1; s/(=+)(?!=*$)/'A' x length($1)/ge; # If only a certain number of bytes are requested, truncate the encoded @@ -706,7 +838,7 @@ sub base64_decode { } tr{A-Za-z0-9+/=}{}cd; # remove non-base64 characters s/=+$//; # remove terminating padding - tr{A-Za-z0-9+/=}{ -_`}; # translate to uuencode + tr{A-Za-z0-9+/=}{ -_}; # translate to uuencode s/.$// if (length($_) % 4 == 1); # unpack cannot cope with extra byte my $length; @@ -727,26 +859,28 @@ sub base64_decode { } sub qp_decode { - local $_ = shift; + my $str = $_[0]; # RFC 2045: when decoding a Quoted-Printable body, any trailing # white space on a line must be deleted - s/[ \t]+(?=\r?\n)//gs; + $str =~ s/[ \t]+(?=\r?\n)//gs; - s/=\r?\n//gs; # soft line breaks + $str =~ s/=\r?\n//gs; # soft line breaks # RFC 2045 explicitly prohibits lowercase characters a-f in QP encoding # do we really want to allow them??? - s/=([0-9a-fA-F]{2})/chr(hex($1))/ge; - return $_; + local $1; + $str =~ s/=([0-9a-fA-F]{2})/chr(hex($1))/ge; + + return $str; } sub base64_encode { local $_ = shift; if (HAS_MIME_BASE64) { - return MIME::Base64::encode_base64($_); + return MIME::Base64::encode_base64($_,''); } $_ = pack("u57", $_); @@ -805,10 +939,10 @@ sub extract_ipv4_addr_from_string { return unless defined($str); if ($str =~ /\b( - (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\. - (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\. - (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\. - (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d) + (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\. + (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\. + (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\. + (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d) )\b/ix) { if (defined $1) { return $1; } @@ -941,8 +1075,8 @@ sub decode_dns_question_entry { local $1; # Net::DNS provides a query in encoded RFC 1035 zone file format, decode it! - $qname =~ s{ \\ ( [0-9]{3} | [^0-9] ) } - { length($1)==1 ? $1 : $1 <= 255 ? chr($1) : "\\$1" }xgse; + $qname =~ s{ \\ ( [0-9]{3} | (?![0-9]{3}) . ) } + { length($1)==3 && $1 <= 255 ? chr($1) : $1 }xgse; return ($q->qclass, $q->qtype, $qname); } @@ -1204,45 +1338,9 @@ sub secure_tmpdir { ########################################################################### ## -## DEPRECATED FUNCTION, only left for third party plugins as fallback. +## DEPRECATED FUNCTION, sub uri_to_domain removed. ## Replaced with Mail::SpamAssassin::RegistryBoundaries::uri_to_domain. ## -sub uri_to_domain { - my ($uri) = @_; - - # Javascript is not going to help us, so return. - return if ($uri =~ /^javascript:/i); - - $uri =~ s{\#.*$}{}gs; # drop fragment - $uri =~ s{^[a-z]+:/{0,2}}{}gsi; # drop the protocol - $uri =~ s{^[^/]*\@}{}gs; # username/passwd - - # strip path and CGI params. note: bug 4213 shows that "&" should - # *not* be likewise stripped here -- it's permitted in hostnames by - # some common MUAs! - $uri =~ s{[/?].*$}{}gs; - - $uri =~ s{:\d*$}{}gs; # port, bug 4191: sometimes the # is missing - - # skip undecoded URIs if the encoded bits shouldn't be. - # we'll see the decoded version as well. see url_encode() - return if $uri =~ /\%(?:2[1-9a-fA-F]|[3-6][0-9a-fA-F]|7[0-9a-eA-E])/; - - my $host = $uri; # unstripped/full domain name - - # keep IPs intact - if ($uri !~ /^\d+\.\d+\.\d+\.\d+$/) { - # get rid of hostname part of domain, understanding delegation - $uri = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($uri); - - # ignore invalid domains - return unless - (Mail::SpamAssassin::Util::RegistrarBoundaries::is_domain_valid($uri)); - } - - # $uri is now the domain only, optionally return unstripped host name - return !wantarray ? lc $uri : (lc $uri, lc $host); -} *uri_list_canonify = \&uri_list_canonicalize; # compatibility alias sub uri_list_canonicalize { @@ -1314,20 +1412,10 @@ sub uri_list_canonicalize { # not required $rest ||= ''; - # Bug 6751: - # RFC 3490 (IDNA): Whenever dots are used as label separators, the - # following characters MUST be recognized as dots: U+002E (full stop), - # U+3002 (ideographic full stop), U+FF0E (fullwidth full stop), - # U+FF61 (halfwidth ideographic full stop). - # RFC 5895: [...] the IDEOGRAPHIC FULL STOP character (U+3002) - # can be mapped to the FULL STOP before label separation occurs. - # [...] Only the IDEOGRAPHIC FULL STOP character (U+3002) is added in - # this mapping because the authors have not fully investigated [...] - # Adding also 'SMALL FULL STOP' (U+FE52) as seen in the wild. - # Parhaps also the 'ONE DOT LEADER' (U+2024). - if ($host =~ s{(?: \xE3\x80\x82 | \xEF\xBC\x8E | \xEF\xBD\xA1 | - \xEF\xB9\x92 | \xE2\x80\xA4 )}{.}xgs) { - push(@nuris, join ('', $proto, $host, $rest)); + my $nhost = idn_to_ascii($host); + if (defined $nhost && $nhost ne lc $host) { + push(@nuris, join('', $proto, $nhost, $rest)); + $host = $nhost; } # bug 4146: deal with non-US ASCII 7-bit chars in the host portion @@ -1340,23 +1428,27 @@ sub uri_list_canonicalize { # deal with http redirectors. strip off one level of redirector # and add back to the array. the foreach loop will go over those # and deal appropriately. - # bug 3308: redirectors like yahoo only need one '/' ... - if ($rest =~ m{(https?:/{0,2}.+)$}i) { - push(@uris, $1); - } - # resort to redirector pattern matching if the generic https? check - # doesn't result in a match -- bug 4176 - else { - foreach (@{$redirector_patterns}) { - if ("$proto$host$rest" =~ $_) { - next unless defined $1; - dbg("uri: parsed uri pattern: $_"); - dbg("uri: parsed uri found: $1 in redirector: $proto$host$rest"); - push (@uris, $1); - last; - } - } + # Bug 7278: try redirector pattern matching first + # (but see also Bug 4176) + my $found_redirector_match; + foreach my $re (@{$redirector_patterns}) { + if ("$proto$host$rest" =~ $re) { + next unless defined $1; + dbg("uri: parsed uri pattern: $re"); + dbg("uri: parsed uri found: $1 in redirector: $proto$host$rest"); + push (@uris, $1); + $found_redirector_match = 1; + last; + } + } + if (!$found_redirector_match) { + # try generic https? check if redirector pattern matching failed + # bug 3308: redirectors like yahoo only need one '/' ... + if ($rest =~ m{(https?:/{0,2}.+)$}i) { + push(@uris, $1); + dbg("uri: parsed uri found: $1 in hard-coded redirector"); + } } ######################## diff --git lib/Mail/SpamAssassin/Util/DependencyInfo.pm lib/Mail/SpamAssassin/Util/DependencyInfo.pm index 8127595bf..893b59da1 100644 --- lib/Mail/SpamAssassin/Util/DependencyInfo.pm +++ lib/Mail/SpamAssassin/Util/DependencyInfo.pm @@ -31,23 +31,15 @@ package Mail::SpamAssassin::Util::DependencyInfo; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use vars qw ( @MODULES @OPTIONAL_MODULES $EXIT_STATUS $WARNINGS @OPTIONAL_BINARIES @BINARIES ); -my $have_sha = eval { require Digest::SHA }; -my $have_sha1 = eval { require Digest::SHA1 }; - @MODULES = ( -$have_sha1 ? { - 'module' => 'Digest::SHA1', - 'version' => 0, - 'desc' => 'The Digest::SHA1 module is used as a cryptographic hash for some - tests and the Bayes subsystem. It is also required by the Razor2 plugin.', -} : { +{ 'module' => 'Digest::SHA', 'version' => 0, 'desc' => 'The Digest::SHA module is used as a cryptographic hash for some @@ -103,14 +95,11 @@ $have_sha1 ? { ); my @OPTIONAL_MODULES = ( -$have_sha ? { +{ 'module' => 'Digest::SHA1', 'version' => 0, - 'desc' => 'The Digest::SHA1 module is required by the Razor2 plugin.', -} : { - 'module' => 'Digest::SHA', - 'version' => 0, - 'desc' => 'The Digest::SHA module is required by the DKIM plugin.', + 'desc' => 'The Digest::SHA1 module is still required by the Razor2 plugin. + Other modules prefer Digest::SHA, which is a Perl base module.', }, { module => 'MIME::Base64', @@ -132,6 +121,14 @@ $have_sha ? { desc => 'Used when manually reporting spam to SpamCop with "spamassassin -r".', }, { + 'module' => 'Net::LibIDN', + 'version' => 0, + 'desc' => "Provides mapping between Internationalized Domain Names (IDN) in + Unicode and ASCII-compatible encoding (ACE) for use in DNS and comparisions. + The module is optional, but without it Unicode IDN names found in mail will + not be suitable for DNS queries and white/blacklisting.", +}, +{ module => 'Mail::SPF', version => 0, desc => 'Used to check DNS Sender Policy Framework (SPF) records to fight email @@ -315,6 +312,13 @@ my @OPTIONAL_BINARIES = ( version => '0', desc => $lwp_note, +}, +{ + binary => 're2c', + version => '0', + + desc => 'The "re2c" program used by sa-compile to compile rules + for regular expressions to speed up scanning.', } ); diff --git lib/Mail/SpamAssassin/Util/MemoryDump.pm lib/Mail/SpamAssassin/Util/MemoryDump.pm index 1f53cb127..25a62b48d 100644 --- lib/Mail/SpamAssassin/Util/MemoryDump.pm +++ lib/Mail/SpamAssassin/Util/MemoryDump.pm @@ -35,7 +35,7 @@ package Mail::SpamAssassin::Util::MemoryDump; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; BEGIN { diff --git lib/Mail/SpamAssassin/Util/Progress.pm lib/Mail/SpamAssassin/Util/Progress.pm index 6454ca0c3..0ebad33b5 100644 --- lib/Mail/SpamAssassin/Util/Progress.pm +++ lib/Mail/SpamAssassin/Util/Progress.pm @@ -44,7 +44,7 @@ package Mail::SpamAssassin::Util::Progress; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; use Time::HiRes qw(time); diff --git lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm deleted file mode 100644 index 88094a9f7..000000000 --- lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm +++ /dev/null @@ -1,440 +0,0 @@ -# The (extremely complex) rules for domain delegation. -# Note that really, this should be called "RegistryBoundaries"; see bug 4605 - -# <@LICENSE> -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to you under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -=head1 NAME - -Mail::SpamAssassin::Util::RegistrarBoundaries - domain delegation rules - -This module is DEPRECATED AND REPLACED WITH -Mail::SpamAssassin::RegistryBoundaries !! - -DO NOT USE. This is left as transition fallback for third party plugins. - -It will be removed in the future but all functionality has been -transitioned to Mail::SpamAssassin::RegistryBoundaries and the TLD -updates via 20_aux_tlds.cf delivered via sa-update with version 3.4.1. - -=cut - -package Mail::SpamAssassin::Util::RegistrarBoundaries; - -use strict; -use warnings; -use bytes; -use re 'taint'; - -use vars qw ( - @ISA %TWO_LEVEL_DOMAINS %THREE_LEVEL_DOMAINS %US_STATES %VALID_TLDS $VALID_TLDS_RE -); - -# %VALID_TLDS -# The list of currently-valid TLDs for the DNS system. -# -# This list is deprecated and unmaintained. It will become increasingly -# out of date and will be removed in a future release. -# -# As of 3.4.1, updates will be done in rules/20_aux_tlds.cf -foreach (qw/abbott abogado ac academy accountant accountants active actor ad ads adult ae aero af afl ag agency ai airforce al allfinanz alsace am amsterdam an android ao apartments aq aquarelle ar archi army arpa as asia associates at attorney au auction audio autos aw ax axa az ba band bank bar barclaycard barclays bargains bayern bb bbc bd be beer berlin best bf bg bh bi bid bike bingo bio biz bj black blackfriday bloomberg blue bm bmw bn bnpparibas bo boats bond boo boutique br brussels bs bt budapest build builders business buzz bv bw by bz bzh ca cab cal camera camp cancerresearch canon capetown capital caravan cards care career careers cartier casa cash casino cat catering cbn cc cd center ceo cern cf cfd cg ch channel chat cheap chloe christmas chrome church ci citic city ck cl claims cleaning click clinic clothing club cm cn co coach codes coffee college cologne com community company computer condos construction consulting contractors cooking cool coop country courses cr credit creditcard cricket crs cruises cu cuisinella cv cw cx cy cymru cz dabur dad dance date dating datsun day dclk de deals degree delivery democrat dental dentist desi design dev diamonds diet digital direct directory discount dj dk dm dnp do docs doha domains doosan download durban dvag dz eat ec edu education ee eg email emerck energy engineer engineering enterprises epson equipment er erni es esq estate et eu eurovision eus events everbank exchange expert exposed fail faith fan fans farm fashion feedback fi film finance financial firmdale fish fishing fit fitness fj fk flights florist flowers flsmidth fly fm fo foo football forex forsale foundation fr frl frogans fund furniture futbol ga gal gallery garden gb gbiz gd gdn ge gent gf gg ggee gh gi gift gifts gives gl glass gle global globo gm gmail gmo gmx gn gold goldpoint golf goo goog google gop gov gp gq gr graphics gratis green gripe gs gt gu guge guide guitars guru gw gy hamburg hangout haus healthcare help here hermes hiphop hiv hk hm hn holdings holiday homes horse host hosting house how hr ht hu ibm id ie ifm il im immo immobilien in industries infiniti info ing ink institute insure int international investments io iq ir irish is it iwc java jcb je jetzt jm jo jobs joburg jp juegos kaufen kddi ke kg kh ki kim kitchen kiwi km kn koeln komatsu kp kr krd kred kw ky kyoto kz la lacaixa land lat latrobe lawyer lb lc lds lease leclerc legal lgbt li lidl life lighting limited limo link lk loan loans london lotte lotto lr ls lt ltda lu luxe luxury lv ly ma madrid maif maison management mango market marketing markets marriott mc md me media meet melbourne meme memorial menu mg mh miami mil mini mk ml mm mma mn mo mobi moda moe monash money mormon mortgage moscow motorcycles mov movie mp mq mr ms mt mtn mtpc mu museum mv mw mx my mz na nagoya name navy nc ne net network neustar new news nexus nf ng ngo nhk ni nico ninja nissan nl no np nr nra nrw ntt nu nyc nz okinawa om one ong onl online ooo oracle org organic osaka otsuka ovh pa page panerai paris partners parts party pe pf pg ph pharmacy photo photography photos physio piaget pics pictet pictures pink pizza pk pl place plumbing plus pm pn pohl poker porn post pr praxi press pro prod productions prof properties property ps pt pub pw py qa qpon quebec re realtor recipes red redstone rehab reise reisen reit ren rentals repair report republican rest restaurant review reviews rich rio rip ro rocks rodeo rs rsvp ru ruhr rw ryukyu sa saarland sale samsung sap sarl saxo sb sc sca scb schmidt school schule schwarz science scot sd se services sew sexy sg sh shiksha shoes shriram si singles site sj sk sky sl sm sn so social software sohu solar solutions soy space spiegel spreadbetting sr st study style su sucks supplies supply support surf surgery suzuki sv sx sy sydney systems sz taipei tatar tattoo tax tc td tech technology tel temasek tennis tf tg th tickets tienda tips tires tirol tj tk tl tm tn to today tokyo tools top toshiba tours town toys tr trade trading training travel trust tt tui tv tw tz ua ug uk university uno uol us uy uz va vacations vc ve vegas ventures versicherung vet vg vi viajes video villas vision vlaanderen vn vodka vote voting voto voyage vu wales wang watch webcam website wed wedding wf whoswho wien wiki williamhill win wme work works world ws wtc wtf xin xn--1qqw23a xn--30rr7y xn--3bst00m xn--3ds443g xn--3e0b707e xn--45brj9c xn--45q11c xn--4gbrim xn--55qw42g xn--55qx5d xn--6frz82g xn--6qq986b3xl xn--80adxhks xn--80ao21a xn--80asehdb xn--80aswg xn--90a3ac xn--90ais xn--9et52u xn--b4w605ferd xn--c1avg xn--cg4bki xn--clchc0ea0b2g2a9gcd xn--czr694b xn--czrs0t xn--czru2d xn--d1acj3b xn--d1alf xn--fiq228c5hs xn--fiq64b xn--fiqs8s xn--fiqz9s xn--flw351e xn--fpcrj9c3d xn--fzc2c9e2c xn--gecrj9c xn--h2brj9c xn--hxt814e xn--i1b6b1a6a2e xn--io0a7i xn--j1amh xn--j6w193g xn--kprw13d xn--kpry57d xn--kput3i xn--l1acc xn--lgbbat1ad8j xn--mgb9awbf xn--mgba3a4f16a xn--mgbaam7a8h xn--mgbab2bd xn--mgbayh7gpa xn--mgbbh1a71e xn--mgbc0a9azcg xn--mgberp4a5d4ar xn--mgbx4cd0ab xn--mxtq1m xn--ngbc5azd xn--node xn--nqv7f xn--nqv7fs00ema xn--o3cw4h xn--ogbpf8fl xn--p1acf xn--p1ai xn--pgbs0dh xn--q9jyb4c xn--qcka1pmc xn--rhqv96g xn--s9brj9c xn--ses554g xn--unup4y xn--vermgensberater-ctb xn--vermgensberatung-pwb xn--vhquv xn--vuq861b xn--wgbh1c xn--wgbl6a xn--xhq521b xn--xkc2al3hye2a xn--xkc2dl3a5ee0h xn--yfro4i67o xn--ygbi2ammx xn--zfr164b xxx xyz yachts yandex ye yodobashi yoga yokohama youtube yt za zip zm zone zuerich zw/) { - $VALID_TLDS{$_} = 1; -} - -# $VALID_TLDS_RE -# %VALID_TLDS as Regexp::List optimized regexp, for use in Plugins etc -# -# This regex is deprecated and unmaintained. It will become increasingly -# out of date and will be removed in a future release. -# -# As of 3.4.1, this regex is generated automatically in Conf.pm -$VALID_TLDS_RE = qr/(?:X(?:N--(?:M(?:GB(?:A(?:(?:3A4F16|YH7GP)A|AM7A8H|B2BD)|ERP4A5D4AR|C0A9AZCG|BH1A71E|X4CD0AB|9AWBF)|XTQ1M)|F(?:IQ(?:(?:228C5H|S8|Z9)S|64B)|PCRJ9C3D|ZC2C9E2C|LW351E)|C(?:ZR(?:694B|S0T|U2D)|LCHC0EA0B2G2A9GCD|G4BKI|1AVG)|V(?:(?:ERMGENSBERAT(?:UNG-PW|ER-CT)|UQ861)B|HQUV)|X(?:KC2(?:DL3A5EE0H|AL3HYE2A)|HQ521B)|3(?:E0B707E|BST00M|DS443G|0RR7Y)|N(?:QV7F(?:S00EMA)?|GBC5AZD|ODE)|80A(?:S(?:EHDB|WG)|DXHKS|O21A)|(?:Q(?:CKA1PM|9JYB4)|GECRJ9)C|4(?:5(?:BRJ9|Q11)C|GBRIM)|KP(?:R(?:W13|Y57)D|UT3I)|9(?:0A(?:3AC|IS)|ET52U)|P(?:1A(?:CF|I)|GBS0DH)|Y(?:FRO4I67O|GBI2AMMX)|6(?:QQ986B3XL|FRZ82G)|I(?:1B6B1A6A2E|O0A7I)|L(?:GBBAT1AD8J|1ACC)|H(?:2BRJ9C|XT814E)|O(?:GBPF8FL|3CW4H)|S(?:9BRJ9C|ES554G)|J(?:6W193G|1AMH)|55Q(?:W42G|X5D)|D1A(?:CJ3B|LF)|WGB(?:H1C|L6A)|B4W605FERD|1QQW23A|RHQV96G|ZFR164B|UNUP4Y)|IN|XX|YZ)|C(?:[CDGKMVWXZ]|O(?:N(?:S(?:TRUCTION|ULTING)|(?:TRACTOR|DO)S)|M(?:P(?:UTER|ANY)|MUNITY)?|(?:L(?:LEG|OGN)|FFE)E|O(?:[LP]|KING)|U(?:NTRY|RSES)|ACH|DES)?|A(?:[BL]|R(?:E(?:ERS?)?|AVAN|TIER|DS)|N(?:CERRESEARCH|ON)|P(?:ETOWN|ITAL)|S(?:[AH]|INO)|T(?:ERING)?|M(?:ERA|P))?|H(?:R(?:ISTMAS|OME)|A(?:NNEL|T)|URCH|EAP|LOE)?|L(?:(?:EAN|OTH)ING|I(?:NIC|CK)|AIMS|UB)?|R(?:EDIT(?:CARD)?|(?:UISE)?S|ICKET)?|I(?:T(?:IC|Y))?|E(?:NTER|RN|O)|U(?:ISINELLA)?|Y(?:MRU)?|B?N|FD?)|S(?:[BDGJLMNRVXZ]|U(?:PP(?:L(?:IES|Y)|ORT)|R(?:GERY|F)|ZUKI|CKS)?|C(?:[AB]|H(?:MIDT|WARZ|OOL|ULE)|IENCE|OT)?|O(?:L(?:UTIONS|AR)|FTWARE|CIAL|HU|Y)?|A(?:ARLAND|MSUNG|LE|RL|XO|P)?|P(?:READBETTING|IEGEL|ACE)|H(?:IKSHA|RIRAM|OES)?|E(?:RVICES|XY|W)?|Y(?:STEMS|DNEY)?|I(?:NGLES|TE)?|T(?:UDY|YLE)?|KY?)|A(?:[OWZ]|C(?:T(?:IVE|OR)|COUNTANTS?|ADEMY)?|U(?:CTION|DIO|TOS)?|L(?:LFINANZ|SACE)?|S(?:SOCIATES|IA)?|B(?:OGADO|BOTT)|R(?:CHI|MY|PA)?|(?:MSTERDA)?M|Q(?:UARELLE)?|I(?:RFORCE)?|T(?:TORNEY)?|D(?:ULT|S)?|N(?:DROID)?|G(?:ENCY)?|PARTMENTS|E(?:RO)?|FL?|XA?)|M(?:[CDGHKLNPQRSVWXYZ]|O(?:R(?:TGAGE|MON)|N(?:ASH|EY)|TORCYCLES|V(?:IE)?|SCOW|BI|DA|E)?|A(?:R(?:KET(?:ING|S)?|RIOTT)|N(?:AGEMENT|GO)|I(?:SON|F)|DRID)?|E(?:M(?:ORIAL|E)|LBOURNE|DIA|ET|NU)?|I(?:(?:AM|N)I|L)|T(?:PC|N)?|U(?:SEUM)?|MA?)|B(?:[DFGHJSTVWY]|A(?:R(?:CLAY(?:CARD|S)|GAINS)?|N[DK]|YERN)?|U(?:ILD(?:ERS)?|DAPEST|SINESS|ZZ)|L(?:ACK(?:FRIDAY)?|OOMBERG|UE)|I(?:[DZ]|(?:NG)?O|KE)?|O(?:UTIQUE|ATS|ND|O)?|E(?:RLIN|ER|ST)?|N(?:PPARIBAS)?|R(?:USSELS)?|BC?|MW?|ZH?)|P(?:[EFGKMNSTWY]|R(?:O(?:D(?:UCTIONS)?|PERT(?:IES|Y)|F)?|AXI|ESS)?|A(?:R(?:T(?:(?:NER)?S|Y)|IS)|NERAI|GE)?|I(?:C(?:T(?:URES|ET)|S)|AGET|ZZA|NK)|H(?:OTO(?:GRAPHY|S)?|ARMACY|YSIO)?|L(?:U(?:MBING|S)|ACE)?|O(?:KER|HL|RN|ST)|UB)|G(?:[FHNPQSTWY]|O(?:[PV]|L(?:D(?:POINT)?|F)|O(?:G(?:LE)?)?)|R(?:A(?:PHIC|TI)S|EEN|IPE)?|U(?:I(?:TARS|DE)|GE|RU)?|L(?:OB(?:AL|O)|ASS|E)?|A(?:L(?:LERY)?|RDEN)?|I(?:FTS?|VES)?|M(?:[OX]|AIL)?|B(?:IZ)?|E(?:NT)?|G(?:EE)?|DN?)|F(?:[JM]|I(?:NANC(?:IAL|E)|SH(?:ING)?|T(?:NESS)?|RMDALE|LM)?|O(?:R(?:SALE|EX)|O(?:TBALL)?|UNDATION)?|L(?:O(?:RIST|WERS)|SMIDTH|IGHTS|Y)|A(?:I(?:TH|L)|SHION|NS?|RM)|U(?:RNITURE|TBOL|ND)|R(?:OGANS|L)?|(?:EEDBAC)?K)|D(?:[JMZ]|E(?:NT(?:IST|AL)|SI(?:GN)?|LIVERY|MOCRAT|GREE|ALS|V)?|I(?:(?:SCOUN|E)T|RECT(?:ORY)?|AMONDS|GITAL)|A(?:[DY]|T(?:ING|SUN|E)|BUR|NCE)|O(?:(?:MAIN|C)S|WNLOAD|OSAN|HA)?|(?:CL)?K|URBAN|VAG|NP)|T(?:[CDFGHJKLMNTVWZ]|O(?:(?:OL|UR|Y)S|SHIBA|DAY|KYO|WN|P)?|R(?:A(?:D(?:ING|E)|INING|VEL)|UST)?|I(?:(?:CKET|P)S|R(?:ES|OL)|ENDA)|E(?:CH(?:NOLOGY)?|MASEK|NNIS|L)|A(?:T(?:TOO|AR)|IPEI|X)|UI)|E(?:[CEG]|N(?:GINEER(?:ING)?|TERPRISES|ERGY)|X(?:P(?:OSED|ERT)|CHANGE)|U(?:ROVISION|S)?|(?:QUIPMEN|A)?T|VE(?:RBANK|NTS)|DU(?:CATION)?|M(?:ERCK|AIL)|S(?:TATE|Q)?|R(?:NI)?|PSON)|R(?:E(?:P(?:UBLICAN|AIR|ORT)|S(?:TAURAN)?T|D(?:STONE)?|I(?:SEN?|T)|N(?:TALS)?|VIEWS?|ALTOR|CIPES|HAB)?|O(?:CKS|DEO)?|I(?:[OP]|CH)|S(?:VP)?|U(?:HR)?|YUKYU|W)|L(?:[BCKRVY]|I(?:M(?:ITED|O)|GHTING|DL|FE|NK)?|A(?:T(?:ROBE)?|CAIXA|WYER|ND)?|O(?:TT[EO]|ANS?|NDON)|E(?:CLERC|ASE|GAL)|U(?:X(?:URY|E))?|T(?:DA)?|D?S|GBT)|I(?:[DELOQST]|N(?:[GK]|(?:VESTMENT|DUSTRIE)S|T(?:ERNATIONAL)?|S(?:TITUT|UR)E|F(?:INITI|O))?|M(?:MO(?:BILIEN)?)?|R(?:ISH)?|[BF]M|WC)|V(?:[CGU]|E(?:(?:NTURE|GA)S|RSICHERUNG|T)?|I(?:(?:AJE|LLA)S|SION|DEO)?|O(?:T(?:[EO]|ING)|YAGE|DKA)|(?:LAANDERE)?N|A(?:CATIONS)?)|H(?:[KMNRTU]|O(?:L(?:DINGS|IDAY)|ST(?:ING)?|[RU]SE|MES|W)|E(?:R(?:MES|E)|ALTHCARE|LP)|A(?:MBURG|NGOUT|US)|I(?:PHOP|V))|W(?:[FS]|E(?:B(?:SITE|CAM)|D(?:DING)?)|I(?:LLIAMHILL|E?N|KI)|A(?:LES|TCH|NG)|OR(?:KS?|LD)|HOSWHO|T[CF]|ME)|N(?:[FLOPUZ]|E(?:T(?:WORK)?|USTAR|WS?|XUS)?|I(?:SSAN|NJA|CO)?|A(?:GOYA|ME|VY)?|R[AW]?|GO?|Y?C|HK|TT)|K(?:[EGHMPWZ]|I(?:TCHEN|WI|M)?|O(?:MATSU|ELN)|(?:AUFE)?N|R(?:E?D)?|Y(?:OTO)?|DDI)|O(?:(?:(?:TSU|SA)K|KINAW)A|R(?:G(?:ANIC)?|ACLE)|N(?:[EG]|L(?:INE)?)|OO|VH|M)|Y(?:[ET]|O(?:(?:KOHAM|G)A|DOBASHI|UTUBE)|A(?:CHTS|NDEX))|J(?:[MP]|O(?:B(?:URG|S))?|E(?:TZT)?|UEGOS|AVA|CB)|U(?:[AGKSYZ]|N(?:IVERSITY|O)|OL)|Z(?:[AMW]|UERICH|ONE|IP)|Q(?:UEBEC|PON|A))/ix; - -# Two-Level TLDs -# -# to resort this, pump the whole list through: -# perl -e '$/=undef; $_=<>; foreach(split) { ($a,$b) = split(/\./, $_, 2); $t{$b}->{$_}=1; } foreach (sort keys %t) { print " ",join(" ", sort keys %{$t{$_}}),"\n" }' -# -# http://www.neustar.us/policies/docs/rfc_1480.txt -# data originally from http://spamcheck.freeapp.net/two-level-tlds -# The freeapp.net site now says that information on the site is obsolete -# See discussion and sources in comments of bug 5677 -# updated as per bug 5815 -# cleanup in progress per bug 6795 (axb) -# Unsorted sources: -# .ua : http://hostmaster.ua -# .hu : http://www.domain.hu/domain/English/szabalyzat/sld.html -# -# This list is deprecated and unmaintained. It will become increasingly -# out of date and will be removed in a future release. -# -# As of 3.4.1, updates will be done in rules/20_aux_tlds.cf -# -foreach(qw/ - - com.ac edu.ac gov.ac mil.ac net.ac org.ac - nom.ad - ac.ae co.ae com.ae gov.ae mil.ae name.ae net.ae org.ae pro.ae sch.ae - com.af edu.af gov.af net.af - co.ag com.ag net.ag nom.ag org.ag - com.ai edu.ai gov.ai net.ai off.ai org.ai - com.al edu.al gov.al net.al org.al - com.an edu.an net.an org.an - co.ao ed.ao gv.ao it.ao og.ao pb.ao - com.ar edu.ar gov.ar int.ar mil.ar net.ar org.ar - e164.arpa in-addr.arpa ip6.arpa iris.arpa uri.arpa urn.arpa - ac.at co.at gv.at or.at priv.at - act.au asn.au com.au conf.au csiro.au edu.au gov.au id.au info.au net.au nsw.au nt.au org.au otc.au oz.au qld.au sa.au tas.au telememo.au vic.au wa.au - com.aw - biz.az com.az edu.az gov.az info.az int.az mil.az name.az net.az org.az pp.az - co.ba com.ba edu.ba gov.ba mil.ba net.ba org.ba rs.ba unbi.ba unsa.ba - com.bb edu.bb gov.bb net.bb org.bb - ac.bd com.bd edu.bd gov.bd mil.bd net.bd org.bd - ac.be belgie.be dns.be fgov.be - gov.bf - biz.bh cc.bh com.bh edu.bh gov.bh info.bh net.bh org.bh - com.bm edu.bm gov.bm net.bm org.bm - com.bn edu.bn net.bn org.bn - com.bo edu.bo gob.bo gov.bo int.bo mil.bo net.bo org.bo tv.bo - adm.br adv.br agr.br am.br arq.br art.br ato.br bio.br bmd.br cim.br cng.br cnt.br com.br coop.br dpn.br eco.br ecn.br edu.br eng.br esp.br etc.br eti.br far.br fm.br fnd.br fot.br fst.br g12.br ggf.br gov.br imb.br ind.br inf.br jor.br lel.br mat.br med.br mil.br mus.br net.br nom.br not.br ntr.br odo.br org.br ppg.br pro.br psc.br psi.br qsl.br rec.br slg.br srv.br tmp.br trd.br tur.br tv.br vet.br zlg.br - com.bs net.bs org.bs - com.bt edu.bt gov.bt net.bt org.bt - co.bw org.bw - gov.by mil.by - com.bz net.bz org.bz - ab.ca bc.ca gc.ca mb.ca nb.ca nf.ca nl.ca ns.ca nt.ca nu.ca on.ca pe.ca qc.ca sk.ca yk.ca - co.ck edu.ck gov.ck net.ck org.ck - ac.cn ah.cn bj.cn com.cn cq.cn edu.cn fj.cn gd.cn gov.cn gs.cn gx.cn gz.cn ha.cn hb.cn he.cn hi.cn hk.cn hl.cn hn.cn jl.cn js.cn jx.cn ln.cn mo.cn net.cn nm.cn nx.cn org.cn qh.cn sc.cn sd.cn sh.cn sn.cn sx.cn tj.cn tw.cn xj.cn xz.cn yn.cn zj.cn - arts.co com.co edu.co firm.co gov.co info.co int.co mil.co net.co nom.co org.co rec.co web.co - lkd.co.im ltd.co.im plc.co.im - co.cm com.cm net.cm - au.com br.com cn.com de.com eu.com gb.com hu.com no.com qc.com ru.com sa.com se.com uk.com us.com uy.com za.com - ac.cr co.cr ed.cr fi.cr go.cr or.cr sa.cr - com.cu edu.cu gov.cu inf.cu net.cu org.cu - gov.cx - ac.cy biz.cy com.cy ekloges.cy gov.cy ltd.cy name.cy net.cy org.cy parliament.cy press.cy pro.cy tm.cy - co.dk - com.dm edu.dm gov.dm net.dm org.dm - art.do com.do edu.do gob.do gov.do mil.do net.do org.do sld.do web.do - art.dz asso.dz com.dz edu.dz gov.dz net.dz org.dz pol.dz - com.ec edu.ec fin.ec gov.ec info.ec k12.ec med.ec mil.ec net.ec org.ec pro.ec gob.ec - co.ee com.ee edu.ee fie.ee med.ee org.ee pri.ee - com.eg edu.eg eun.eg gov.eg mil.eg net.eg org.eg sci.eg - com.er edu.er gov.er ind.er mil.er net.er org.er - com.es edu.es gob.es nom.es org.es - biz.et com.et edu.et gov.et info.et name.et net.et org.et - aland.fi - ac.fj biz.fj com.fj gov.fj id.fj info.fj mil.fj name.fj net.fj org.fj pro.fj school.fj - ac.fk co.fk com.fk gov.fk net.fk nom.fk org.fk - tm.fr asso.fr nom.fr prd.fr presse.fr com.fr gouv.fr - com.ge edu.ge gov.ge mil.ge net.ge org.ge pvt.ge - ac.gg alderney.gg co.gg gov.gg guernsey.gg ind.gg ltd.gg net.gg org.gg sark.gg sch.gg - com.gh edu.gh gov.gh mil.gh org.gh - com.gi edu.gi gov.gi ltd.gi mod.gi org.gi - ac.gn com.gn gov.gn net.gn org.gn - asso.gp com.gp edu.gp net.gp org.gp - com.gr edu.gr gov.gr net.gr org.gr - com.gt edu.gt gob.gt ind.gt mil.gt net.gt org.gt - com.gu edu.gu gov.gu mil.gu net.gu org.gu - com.hk edu.hk gov.hk idv.hk net.hk org.hk - com.hn edu.hn gob.hn mil.hn net.hn org.hn - com.hr from.hr iz.hr name.hr - adult.ht art.ht asso.ht com.ht coop.ht edu.ht firm.ht gouv.ht info.ht med.ht net.ht org.ht perso.ht pol.ht pro.ht rel.ht shop.ht - 2000.hu agrar.hu bolt.hu casino.hu city.hu co.hu erotica.hu erotika.hu film.hu forum.hu games.hu hotel.hu info.hu ingatlan.hu jogasz.hu konyvelo.hu lakas.hu media.hu news.hu org.hu priv.hu reklam.hu sex.hu shop.hu sport.hu suli.hu szex.hu tm.hu tozsde.hu utazas.hu video.hu - ac.id co.id go.id mil.id net.id or.id sch.id web.id - gov.ie - ac.il co.il gov.il idf.il k12.il muni.il net.il org.il - ac.im co.im gov.im net.im nic.im org.im - ac.in co.in edu.in ernet.in firm.in gen.in gov.in ind.in mil.in net.in nic.in org.in res.in - com.io gov.io mil.io net.io org.io - ac.ir co.ir gov.ir id.ir net.ir org.ir sch.ir - edu.it gov.it - ac.je co.je gov.je ind.je jersey.je ltd.je net.je org.je sch.je - com.jm edu.jm gov.jm net.jm org.jm - com.jo edu.jo gov.jo mil.jo net.jo org.jo - ac.jp ad.jp aichi.jp akita.jp aomori.jp chiba.jp co.jp ed.jp ehime.jp fukui.jp fukuoka.jp fukushima.jp gifu.jp go.jp gov.jp gr.jp gunma.jp hiroshima.jp hokkaido.jp hyogo.jp ibaraki.jp ishikawa.jp iwate.jp kagawa.jp kagoshima.jp kanagawa.jp kanazawa.jp kawasaki.jp kitakyushu.jp kobe.jp kochi.jp kumamoto.jp kyoto.jp lg.jp matsuyama.jp mie.jp miyagi.jp miyazaki.jp nagano.jp nagasaki.jp nagoya.jp nara.jp ne.jp net.jp niigata.jp oita.jp okayama.jp okinawa.jp or.jp org.jp osaka.jp saga.jp saitama.jp sapporo.jp sendai.jp shiga.jp shimane.jp shizuoka.jp takamatsu.jp tochigi.jp tokushima.jp tokyo.jp tottori.jp toyama.jp utsunomiya.jp wakayama.jp yamagata.jp yamaguchi.jp yamanashi.jp yokohama.jp - ac.ke co.ke go.ke ne.ke new.ke or.ke sc.ke - com.kg edu.kg gov.kg mil.kg net.kg org.kg - com.kh edu.kh gov.kh mil.kh net.kh org.kh per.kh - ac.kr busan.kr chungbuk.kr chungnam.kr co.kr daegu.kr daejeon.kr es.kr gangwon.kr go.kr gwangju.kr gyeongbuk.kr gyeonggi.kr gyeongnam.kr hs.kr incheon.kr jeju.kr jeonbuk.kr jeonnam.kr kg.kr kyonggi.kr mil.kr ms.kr ne.kr or.kr pe.kr re.kr sc.kr seoul.kr ulsan.kr - com.kw edu.kw gov.kw mil.kw net.kw org.kw - com.ky edu.ky gov.ky net.ky org.ky - com.kz edu.kz gov.kz mil.kz net.kz org.kz - com.la net.la org.la - com.lb edu.lb gov.lb mil.lb net.lb org.lb - com.lc edu.lc gov.lc net.lc org.lc - assn.lk com.lk edu.lk gov.lk grp.lk hotel.lk int.lk ltd.lk net.lk ngo.lk org.lk sch.lk soc.lk web.lk - com.lr edu.lr gov.lr net.lr org.lr - co.ls org.ls - gov.lt mil.lt - asn.lv com.lv conf.lv edu.lv gov.lv id.lv mil.lv net.lv org.lv - biz.ly com.ly edu.ly gov.ly id.ly med.ly net.ly org.ly plc.ly sch.ly - ac.ma co.ma gov.ma net.ma org.ma press.ma - asso.mc tm.mc - ac.me co.me edu.me gov.me its.me net.me org.me priv.me - com.mg edu.mg gov.mg mil.mg nom.mg org.mg prd.mg tm.mg - army.mil navy.mil - com.mk org.mk - com.mm edu.mm gov.mm net.mm org.mm - edu.mn gov.mn org.mn - com.mo edu.mo gov.mo net.mo org.mo - music.mobi weather.mobi - co.mp edu.mp gov.mp net.mp org.mp - com.mt edu.mt gov.mt net.mt org.mt tm.mt uu.mt - co.mu com.mu - aero.mv biz.mv com.mv coop.mv edu.mv gov.mv info.mv int.mv mil.mv museum.mv name.mv net.mv org.mv pro.mv - ac.mw co.mw com.mw coop.mw edu.mw gov.mw int.mw museum.mw net.mw org.mw - com.mx edu.mx gob.mx net.mx org.mx - com.my edu.my gov.my mil.my name.my net.my org.my - alt.na com.na cul.na edu.na net.na org.na telecom.na unam.na - com.nc net.nc org.nc - de.net gb.net uk.net - ac.ng com.ng edu.ng gov.ng net.ng org.ng sch.ng - ac.ni biz.ni com.ni edu.ni gob.ni in.ni info.ni int.ni mil.ni net.ni nom.ni org.ni web.ni - fhs.no folkebibl.no fylkesbibl.no herad.no idrett.no kommune.no mil.no museum.no priv.no stat.no tel.no vgs.no - com.np edu.np gov.np mil.np net.np org.np - biz.nr co.nr com.nr edu.nr fax.nr gov.nr info.nr mob.nr mobil.nr mobile.nr net.nr org.nr tel.nr tlf.nr - ac.nz co.nz cri.nz geek.nz gen.nz govt.nz iwi.nz maori.nz mil.nz net.nz org.nz school.nz - ac.om biz.om co.om com.om edu.om gov.om med.om mil.om mod.om museum.om net.om org.om pro.om sch.om - dk.org eu.org - abo.pa ac.pa com.pa edu.pa gob.pa ing.pa med.pa net.pa nom.pa org.pa sld.pa - com.pe edu.pe gob.pe mil.pe net.pe nom.pe org.pe - com.pf edu.pf org.pf - ac.pg com.pg net.pg - com.ph edu.ph gov.ph mil.ph net.ph ngo.ph org.ph - biz.pk com.pk edu.pk fam.pk gob.pk gok.pk gon.pk gop.pk gos.pk gov.pk net.pk org.pk web.pk - art.pl biz.pl com.pl edu.pl gov.pl info.pl mil.pl net.pl ngo.pl org.pl - biz.pr com.pr edu.pr gov.pr info.pr isla.pr name.pr net.pr org.pr pro.pr - cpa.pro law.pro med.pro - com.ps edu.ps gov.ps net.ps org.ps plo.ps sec.ps - com.pt edu.pt gov.pt int.pt net.pt nome.pt org.pt publ.pt - com.py edu.py gov.py net.py org.py - com.qa edu.qa gov.qa net.qa org.qa - asso.re com.re nom.re - arts.ro com.ro firm.ro info.ro nom.ro nt.ro org.ro rec.ro store.ro tm.ro www.ro - ac.rs co.rs edu.rs gov.rs in.rs org.rs - ac.ru com.ru edu.ru gov.ru int.ru mil.ru net.ru org.ru pp.ru - ac.rw co.rw com.rw edu.rw gouv.rw gov.rw int.rw mil.rw net.rw - com.sa edu.sa gov.sa med.sa net.sa org.sa pub.sa sch.sa - com.sb edu.sb gov.sb net.sb org.sb - com.sc edu.sc gov.sc net.sc org.sc - com.sd edu.sd gov.sd info.sd med.sd net.sd org.sd sch.sd tv.sd - ab.se ac.se bd.se brand.se c.se d.se e.se f.se fh.se fhsk.se fhv.se g.se h.se i.se k.se komforb.se kommunalforbund.se komvux.se lanarb.se lanbib.se m.se mil.se n.se naturbruksgymn.se o.se org.se parti.se pp.se press.se s.se sshn.se t.se tm.se u.se w.se x.se y.se z.se - com.sg edu.sg gov.sg idn.sg net.sg org.sg per.sg - com.sh edu.sh gov.sh mil.sh net.sh org.sh - edu.sk gov.sk mil.sk - co.st com.st consulado.st edu.st embaixada.st gov.st mil.st net.st org.st principe.st saotome.st store.st - com.sv edu.sv gob.sv org.sv red.sv - com.sy gov.sy net.sy org.sy - at.tf bg.tf ca.tf ch.tf cz.tf de.tf edu.tf eu.tf int.tf net.tf pl.tf ru.tf sg.tf us.tf - ac.th co.th go.th in.th mi.th net.th or.th - ac.tj biz.tj co.tj com.tj edu.tj go.tj gov.tj int.tj mil.tj name.tj net.tj org.tj web.tj - com.tn edunet.tn ens.tn fin.tn gov.tn ind.tn info.tn intl.tn nat.tn net.tn org.tn rnrt.tn rns.tn rnu.tn tourism.tn - gov.to - gov.tp - av.tr bbs.tr bel.tr biz.tr com.tr dr.tr edu.tr gen.tr gov.tr info.tr k12.tr mil.tr name.tr net.tr org.tr pol.tr tel.tr web.tr - aero.tt at.tt au.tt be.tt biz.tt ca.tt co.tt com.tt coop.tt de.tt dk.tt edu.tt es.tt eu.tt fr.tt gov.tt info.tt int.tt it.tt jobs.tt mobi.tt museum.tt name.tt net.tt nic.tt org.tt pro.tt se.tt travel.tt uk.tt us.tt - co.tv gov.tv - club.tw com.tw ebiz.tw edu.tw game.tw gov.tw idv.tw mil.tw net.tw org.tw - ac.tz co.tz go.tz ne.tz or.tz - cherkassy.ua chernigov.ua chernovtsy.ua ck.ua cn.ua co.ua com.ua crimea.ua cv.ua dn.ua dnepropetrovsk.ua donetsk.ua dp.ua edu.ua gov.ua if.ua in.ua ivano-frankivsk.ua kh.ua kharkov.ua kherson.ua khmelnitskiy.ua kiev.ua kirovograd.ua km.ua kr.ua ks.ua kv.ua lg.ua lugansk.ua lutsk.ua lviv.ua mk.ua net.ua nikolaev.ua od.ua odessa.ua org.ua pl.ua poltava.ua rovno.ua rv.ua sebastopol.ua sumy.ua te.ua ternopil.ua uzhgorod.ua vinnica.ua vn.ua zaporizhzhe.ua zhitomir.ua zp.ua zt.ua - ac.ug co.ug go.ug ne.ug or.ug sc.ug - ac.uk bl.uk british-library.uk co.uk edu.uk gov.uk icnet.uk jet.uk ltd.uk me.uk mod.uk national-library-scotland.uk net.uk nhs.uk nic.uk nls.uk org.uk parliament.uk plc.uk police.uk sch.uk - ak.us al.us ar.us az.us ca.us co.us ct.us dc.us de.us dni.us fed.us fl.us ga.us hi.us ia.us id.us il.us in.us isa.us kids.us ks.us ky.us la.us ma.us md.us me.us mi.us mn.us mo.us ms.us mt.us nc.us nd.us ne.us nh.us nj.us nm.us nsn.us nv.us ny.us oh.us ok.us or.us pa.us ri.us sc.us sd.us tn.us tx.us ut.us va.us vt.us wa.us wi.us wv.us wy.us - com.uy edu.uy gub.uy mil.uy net.uy org.uy - vatican.va - arts.ve bib.ve co.ve com.ve edu.ve firm.ve gov.ve info.ve int.ve mil.ve net.ve nom.ve org.ve rec.ve store.ve tec.ve web.ve - co.vi com.vi edu.vi gov.vi net.vi org.vi - ac.vn biz.vn com.vn edu.vn gov.vn health.vn info.vn int.vn name.vn net.vn org.vn pro.vn - ch.vu com.vu de.vu edu.vu fr.vu net.vu org.vu - com.ws edu.ws gov.ws net.ws org.ws - com.ye edu.ye gov.ye mil.ye net.ye org.ye - ac.za alt.za bourse.za city.za co.za edu.za gov.za law.za mil.za net.za ngo.za nom.za org.za school.za tm.za web.za - ac.zm co.zm com.zm edu.zm gov.zm org.zm sch.zm - ac.zw co.zw gov.zw org.zw - - /) { - $TWO_LEVEL_DOMAINS{$_} = 1; -} - -# This is required because the .us domain is nuts. See $THREE_LEVEL_DOMAINS -# below. -# -# This list is moved to transitioned to Mail::SpamAssassin::RegistryBoundaries - -foreach (qw/ - ak al ar az ca co ct dc de fl ga gu hi ia id il in ks ky la ma md me mi - mn mo ms mt nc nd ne nh nj nm nv ny oh ok or pa pr ri sc sd tn tx ut va vi - vt wa wi wv wy - /) { - $US_STATES{$_} = 1; -} - -## -## DO NOT UPDATE THIS DEPRECATED LIST -## Everything is now maintained in sa-update 20_aux_tlds.cf -## -foreach (qw/ - demon.co.uk esc.edu.ar lkd.co.im plc.co.im - /) { - $THREE_LEVEL_DOMAINS{$_} = 1; -} - -########################################################################### - -=head1 METHODS - -=over 4 - -=item ($hostname, $domain) = split_domain ($fqdn) - -Cut a fully-qualified hostname into the hostname part and the domain -part, splitting at the DNS registry boundary. - -Examples: - - "www.foo.com" => ( "www", "foo.com" ) - "www.foo.co.uk" => ( "www", "foo.co.uk" ) - -This function has been moved !!! See Mail::SpamAssassin::RegistryBoundaries !!! - -This is left as transition fallback for third party plugins. - -It will be removed in the future. - -=cut - -sub split_domain { - my $domain = lc shift; - my $hostname = ''; - - if (defined $domain && $domain ne '') { - # www..spamassassin.org -> www.spamassassin.org - $domain =~ tr/././s; - - # leading/trailing dots - $domain =~ s/^\.+//; - $domain =~ s/\.+$//; - - # Split scalar domain into components - my @domparts = split(/\./, $domain); - my @hostname; - - while (@domparts > 1) { # go until we find the TLD - if (@domparts == 4) { - if ($domparts[3] eq 'us' && - (($domparts[0] eq 'pvt' && $domparts[1] eq 'k12') || - ($domparts[0] =~ /^c[io]$/))) - { - # http://www.neustar.us/policies/docs/rfc_1480.txt - # "Fire-Dept.CI.Los-Angeles.CA.US" - # ".PVT.K12..US" - last if ($US_STATES{$domparts[2]}); - } - } - elsif (@domparts == 3) { - # http://www.neustar.us/policies/docs/rfc_1480.txt - # demon.co.uk - # esc.edu.ar - # [^\.]+\.${US_STATES}\.us - if ($domparts[2] eq 'us') { - last if ($US_STATES{$domparts[1]}); - } - else { - my $temp = join(".", @domparts); - last if ($THREE_LEVEL_DOMAINS{$temp}); - } - } - elsif (@domparts == 2) { - # co.uk, etc. - my $temp = join(".", @domparts); - last if ($TWO_LEVEL_DOMAINS{$temp}); - } - push(@hostname, shift @domparts); - } - - # Look for a sub-delegated TLD - # use @domparts to skip trying to match on TLDs that can't possibly - # match, but keep in mind that the hostname can be blank, so 4TLD needs 4, - # 3TLD needs 3, 2TLD needs 2 ... - # - unshift @domparts, pop @hostname if @hostname; - $domain = join(".", @domparts); - $hostname = join(".", @hostname); - } - - ($hostname, $domain); -} - -########################################################################### - -=item $domain = trim_domain($fqdn) - -Cut a fully-qualified hostname into the hostname part and the domain -part, returning just the domain. - -Examples: - - "www.foo.com" => "foo.com" - "www.foo.co.uk" => "foo.co.uk" - -This function has been moved !!! See Mail::SpamAssassin::RegistryBoundaries !!! - -This is left as transition fallback for third party plugins. - -It will be removed in the future. - -=cut - -sub trim_domain { - my ($domain) = @_; - my ($host, $dom) = split_domain($domain); - return $dom; -} - -########################################################################### - -=item $ok = is_domain_valid($dom) - -Return C<1> if the domain is valid, C otherwise. A valid domain -(a) does not contain whitespace, (b) contains at least one dot, and (c) -uses a valid TLD or ccTLD. - -This function has been moved !!! See Mail::SpamAssassin::RegistryBoundaries !!! - -This is left as transition fallback for third party plugins. - -It will be removed in the future. - -=back - -=cut - -sub is_domain_valid { - my ($dom) = @_; - - # domains don't have whitespace - return 0 if ($dom =~ /\s/); - - # ensure it ends in a known-valid TLD, and has at least 1 dot - return 0 unless ($dom =~ /\.([^.]+)$/); - return 0 unless ($VALID_TLDS{$1}); - - return 1; # nah, it's ok. -} - -1; diff --git lib/Mail/SpamAssassin/Util/ScopedTimer.pm lib/Mail/SpamAssassin/Util/ScopedTimer.pm index 9bdcddf79..0fe757565 100644 --- lib/Mail/SpamAssassin/Util/ScopedTimer.pm +++ lib/Mail/SpamAssassin/Util/ScopedTimer.pm @@ -21,7 +21,7 @@ package Mail::SpamAssassin::Util::ScopedTimer; use strict; use warnings; -use bytes; +# use bytes; use re 'taint'; our @ISA = qw(); diff --git masses/contrib/automasscheck-minimal/automasscheck-minimal.sh masses/contrib/automasscheck-minimal/automasscheck-minimal.sh index 01a459417..3374cfe25 100755 --- masses/contrib/automasscheck-minimal/automasscheck-minimal.sh +++ masses/contrib/automasscheck-minimal/automasscheck-minimal.sh @@ -46,7 +46,7 @@ if [ "$1" == "--nightly" ]; then echo "Syncing $TYPE" rsync -qrz --delete rsync://rsync.spamassassin.org/tagged_builds/$TYPE/ $WORKDIR/$TYPE/ retval=$? - JOBS=8 + JOBS=${JOBS} LOGTYPE= RSYNCMOD=corpus elif date +%w |grep -q ^6; then @@ -55,7 +55,7 @@ if [ "$1" == "--nightly" ]; then echo "Syncing $TYPE" rsync -qrz --delete rsync://rsync.spamassassin.org/tagged_builds/$TYPE/ $WORKDIR/$TYPE/ retval=$? - JOBS=8 + JOBS=${JOBS} NET=--net LOGTYPE=net- RSYNCMOD=corpus @@ -65,7 +65,7 @@ if [ "$1" == "--nightly" ]; then echo "Syncing $TYPE" rsync -qrz --delete rsync://rsync.spamassassin.org/tagged_builds/$TYPE/ $WORKDIR/$TYPE/ retval=$? - JOBS=8 + JOBS=${JOBS} LOGTYPE= RSYNCMOD=corpus fi @@ -92,7 +92,8 @@ run_masscheck() { "$@" LOGLIST="$LOGLIST ham-${LOGNAME} spam-${LOGNAME}" set +x - + ln -s ham-${LOGNAME} ham.log + ln -s spam-${LOGNAME} spam.log } upload_results() { @@ -131,6 +132,7 @@ else fi # Run +JOBS=${JOBS:=8} setup_checktype $@ mkdir -p $WORKDIR/$TYPE cd $WORKDIR/$TYPE diff --git masses/rule-dev/seek-phrases-in-log masses/rule-dev/seek-phrases-in-log index 178be4444..b20174cd8 100755 --- masses/rule-dev/seek-phrases-in-log +++ masses/rule-dev/seek-phrases-in-log @@ -27,14 +27,14 @@ seek-phrases-in-log - extract good-looking rules from a text-dump mc log sub usage { die " -usage: seek-phrases-in-log [--reqhitrate n] [--reqpatlength n] +usage: seek-phrases-in-log [--reqhitrate n] [--reqpatlength n] [ --maxreqpatlength n] [--rules] [--ruletype 'type'] [--ruleprefix FOO] [--maxtextread n] --ham hamlog --spam spamlog --reqhitrate: percentage hit-rate against spam required (default: 0.5) (multiple values can be specified, separated by spaces) --reqpatlength: required pattern length, in characters (default: 0) ---maxreqpatlength: maximum pattern length, in characters (default: 2048) +--maxreqpatlength: maximum pattern length, in characters (default: 1024) --maxtextread: bytes of message text examined (default: 32768) --rules: generate SpamAssassin rule output (default: 0) --ruleprefix: specify prefix string for rules (default: 'SEEK_') @@ -59,6 +59,7 @@ sub logmsg; my %opt = (); $opt{reqhitrate} = 0.5; $opt{reqpatlength} = 0; +$opt{maxreqpatlength} = 1024; $opt{maxtextread} = 32768; $opt{rules} = 0; $opt{ruleprefix} = 'SEEK_'; @@ -72,6 +73,7 @@ GetOptions( "ruleprefix=s" => \$opt{ruleprefix}, "reqhitrate=s" => \$opt{reqhitrate}, "reqpatlength=s" => \$opt{reqpatlength}, + "maxreqpatlength=s" => \$opt{maxreqpatlength}, "ruletype=s" => \$opt{ruletype}, "maxtextread=s" => \$opt{maxtextread}, "phase2=s" => \$opt{phase2}, @@ -152,7 +154,7 @@ sub proc_text_spam { $text = substr $text, 0, $opt{maxtextread}; # chop! } - $text =~ s/ +/ /gs; # single spaces, please + $text =~ s/ +/ /gs; # single spaces, please # we only need to save spam samples in memory, ignore ham samples push @text_string, $text; @@ -302,7 +304,7 @@ sub filter_into_message_subsets { } logmsg "message subsets found: ".(scalar - keys %{$asmstate->{all_patterns_for_set}}); + keys %{$asmstate->{all_patterns_for_set}}); $asmstate->{ngram_count} = \%ngram_count; $asmstate->{msg_subset_hit} = \%msg_subset_hit; @@ -365,8 +367,8 @@ sub assemble_regexps { my $count = 0; my $count_out = 0; foreach my $id (sort { - $asmstate->{ngram_count}->{$b} <=> $asmstate->{ngram_count}->{$a} - } keys %{$asmstate->{ngram_count}}) + $asmstate->{ngram_count}->{$b} <=> $asmstate->{ngram_count}->{$a} + } keys %{$asmstate->{ngram_count}}) { my $set = $asmstate->{msg_subset_hit}->{$id}; next if $done_set{$set}; $done_set{$set}++; @@ -393,12 +395,12 @@ sub assemble_regexps { foreach my $pat (@pats) { my $subsumed = 0; foreach my $done (@done_pats, @pats_new) { - # pattern == existing pattern, or existing pattern is contained by - # pattern, or pattern is contained in existing pattern + # pattern == existing pattern, or existing pattern is contained by + # pattern, or pattern is contained in existing pattern if ($pat eq $done || $pat =~ /\Q${done}\E/ || $done =~ /\Q${pat}\E/) - { $subsumed=1; last; } - # or one pattern contains the other (but interpreted as a regexp!) - # this deals with /foo.{0,10} bar/ vs /foo ish bar/ + { $subsumed=1; last; } + # or one pattern contains the other (but interpreted as a regexp!) + # this deals with /foo.{0,10} bar/ vs /foo ish bar/ if ($pat =~ /$done/) { $subsumed=1; last; } if ($done =~ /$pat/) { $subsumed=1; last; } } @@ -436,15 +438,15 @@ sub assemble_regexps { foreach my $pat (sort @pats) { my $name = generate_rule_name($pat); - if ($opt{ruletype} eq 'header') { - # deal with header-specific munging. - # "\[\\n\]" is the result of "[\n]", at this stage - $pat =~ s/\Q\[\\n\]\E/\\n/gs; - $pat =~ s/\Q\[\\t\]\E/\\t/gs; - } + if ($opt{ruletype} eq 'header') { + # deal with header-specific munging. + # "\[\\n\]" is the result of "[\n]", at this stage + $pat =~ s/\Q\[\\n\]\E/\\n/gs; + $pat =~ s/\Q\[\\t\]\E/\\t/gs; + } print "$opt{ruletype} $opt{ruleprefix}${name} /$pat/\n"; - $count_out++; + $count_out++; } } else { @@ -672,8 +674,8 @@ sub expand_with_dots { sub ensure_reqpatlength { my @ret = @_; if ($opt{reqpatlength}) { - @ret = grep { (length($_) >= $opt{reqpatlength}) && (length($_) < $opt{maxreqpatlength}) } @ret; - return () unless @ret; + @ret = grep { (length($_) >= $opt{reqpatlength}) && (length($_) < $opt{maxreqpatlength}) } @ret; + return () unless @ret; } return @ret; } diff --git rules-extras/10_uridnsbl_skip_financial.cf rules-extras/10_uridnsbl_skip_financial.cf new file mode 100644 index 000000000..84dbaa6ae --- /dev/null +++ rules-extras/10_uridnsbl_skip_financial.cf @@ -0,0 +1,625 @@ + +# +# Last update: 2016-08-29-axb +# Phished financial domains +ifplugin Mail::SpamAssassin::Plugin::URIDNSBL + +uridnsbl_skip_domain 1stnationalbank.com +uridnsbl_skip_domain 365online.com +uridnsbl_skip_domain 53.com +uridnsbl_skip_domain abl.com.pk +uridnsbl_skip_domain abnamro.nl +uridnsbl_skip_domain accessbankplc.com +uridnsbl_skip_domain adib.ae +uridnsbl_skip_domain aib.ie +uridnsbl_skip_domain aibgb.co.uk +uridnsbl_skip_domain airdriesavingsbank.com +uridnsbl_skip_domain aldermore.co.uk +uridnsbl_skip_domain alliancebank.com.my +uridnsbl_skip_domain alliancefg.com +uridnsbl_skip_domain alliantcreditunion.com +uridnsbl_skip_domain alliantcreditunion.org +uridnsbl_skip_domain allianz.de +uridnsbl_skip_domain allybank.com +uridnsbl_skip_domain alterna.ca +uridnsbl_skip_domain americanexpress.ch +uridnsbl_skip_domain americanexpress.com +uridnsbl_skip_domain anadolubank.nl +uridnsbl_skip_domain anz.co.nz +uridnsbl_skip_domain anz.com +uridnsbl_skip_domain anz.com.au +uridnsbl_skip_domain arbuthnotlatham.co.uk +uridnsbl_skip_domain asb.co.nz +uridnsbl_skip_domain authorize.net +uridnsbl_skip_domain axisbank.co.in +uridnsbl_skip_domain axisbank.com +uridnsbl_skip_domain b2bbank.com +uridnsbl_skip_domain baaderbank.de +uridnsbl_skip_domain baloise.ch +uridnsbl_skip_domain baml.com +uridnsbl_skip_domain banamex.com +uridnsbl_skip_domain bancanetbsc.do +uridnsbl_skip_domain bancanetsantacruz.com.do +uridnsbl_skip_domain bancapulia.it +uridnsbl_skip_domain bancarios.com +uridnsbl_skip_domain bancastato.ch +uridnsbl_skip_domain bancatransilvania.ro +uridnsbl_skip_domain banco.bradesco +uridnsbl_skip_domain bancobase.com +uridnsbl_skip_domain bancobic.ao +uridnsbl_skip_domain bancobic.pt +uridnsbl_skip_domain bancobpi.pt +uridnsbl_skip_domain bancobrasil.com.br +uridnsbl_skip_domain bancochile.cl +uridnsbl_skip_domain bancochile.com +uridnsbl_skip_domain bancoestado.cl +uridnsbl_skip_domain bancofalabella.cl +uridnsbl_skip_domain bancofalabella.com.co +uridnsbl_skip_domain bancofalabella.pe +uridnsbl_skip_domain bancomer.com +uridnsbl_skip_domain bancopopolare.it +uridnsbl_skip_domain bancoposta.it +uridnsbl_skip_domain bancopostaclick.it +uridnsbl_skip_domain bancosantander.es +uridnsbl_skip_domain bancovotorantimcartoes.com.br +uridnsbl_skip_domain bank-of-ireland.co.uk +uridnsbl_skip_domain bank.barclays.co.uk +uridnsbl_skip_domain bank24.ru +uridnsbl_skip_domain bankalhabib.com +uridnsbl_skip_domain bankaustria.at +uridnsbl_skip_domain bankbgzbnpparibas.pl +uridnsbl_skip_domain bankcardservices.co.uk +uridnsbl_skip_domain bankcomm.com +uridnsbl_skip_domain bankcoop.ch +uridnsbl_skip_domain bankia.com +uridnsbl_skip_domain bankia.es +uridnsbl_skip_domain bankiabancapersonal.es +uridnsbl_skip_domain bankinter.com +uridnsbl_skip_domain bankinter.es +uridnsbl_skip_domain bankmutual.com +uridnsbl_skip_domain bankofamerica.com +uridnsbl_skip_domain bankofcanada.ca +uridnsbl_skip_domain bankofchina.com +uridnsbl_skip_domain bankofcyprus.com +uridnsbl_skip_domain bankofindia.co.nz +uridnsbl_skip_domain bankofireland.com +uridnsbl_skip_domain bankofirelanduk.com +uridnsbl_skip_domain bankofoklahoma.com +uridnsbl_skip_domain bankofscotland.co.uk +uridnsbl_skip_domain bankofsingapore.com +uridnsbl_skip_domain banksinarmas.com +uridnsbl_skip_domain bankvonroll.ch +uridnsbl_skip_domain bankwest.com.au +uridnsbl_skip_domain banque-casino.fr +uridnsbl_skip_domain banquepopulaire.fr +uridnsbl_skip_domain banquescotia.com +uridnsbl_skip_domain barclaycard.co.uk +uridnsbl_skip_domain barclaycard.de +uridnsbl_skip_domain barclaycard.es +uridnsbl_skip_domain barclays.co.uk +uridnsbl_skip_domain barclays.com +uridnsbl_skip_domain barclays.sc +uridnsbl_skip_domain barclayspartnerfinance.com +uridnsbl_skip_domain barodanzltd.co.nz +uridnsbl_skip_domain basler.ch +uridnsbl_skip_domain bba.org.uk +uridnsbl_skip_domain bbandt.com +uridnsbl_skip_domain bci.cl +uridnsbl_skip_domain bcp.com.pe +uridnsbl_skip_domain bcv.ch +uridnsbl_skip_domain bcvs.ch +uridnsbl_skip_domain bekb.ch +uridnsbl_skip_domain bellevue.ch +uridnsbl_skip_domain bendigobank.com.au +uridnsbl_skip_domain berliner-bank.de +uridnsbl_skip_domain berliner-sparkasse.de +uridnsbl_skip_domain bfanet.ao +uridnsbl_skip_domain bgfi.com +uridnsbl_skip_domain bgfionline.com +uridnsbl_skip_domain bgzbnpparibas.pl +uridnsbl_skip_domain billmelater.com +uridnsbl_skip_domain bk.rw +uridnsbl_skip_domain bkb.ch +uridnsbl_skip_domain bks.at +uridnsbl_skip_domain blkb.ch +uridnsbl_skip_domain bmo.com +uridnsbl_skip_domain bmocm.com +uridnsbl_skip_domain bmogam.com +uridnsbl_skip_domain bmoharris.com +uridnsbl_skip_domain bmoharrisprivatebankingonline.com +uridnsbl_skip_domain bmoinvestorline.com +uridnsbl_skip_domain bmonesbittburns.com +uridnsbl_skip_domain bnl.it +uridnsbl_skip_domain bnpparibas.com +uridnsbl_skip_domain bnpparibas.fr +uridnsbl_skip_domain bnpparibasfortis.be +uridnsbl_skip_domain boc.cnnz +uridnsbl_skip_domain bonuscard.ch +uridnsbl_skip_domain bpe-gruposantander.com +uridnsbl_skip_domain bpi.pt +uridnsbl_skip_domain bpostbank.be +uridnsbl_skip_domain bradescardonline.com.br +uridnsbl_skip_domain bradesco.com.br +uridnsbl_skip_domain bradescoseguranca.com.br +uridnsbl_skip_domain bridgewaterbank.ca +uridnsbl_skip_domain bsibank.com +uridnsbl_skip_domain bt-trade.ro +uridnsbl_skip_domain btrl.ro +uridnsbl_skip_domain businessonline-boi.com +uridnsbl_skip_domain bzbank.ch +uridnsbl_skip_domain ca-cib.com +uridnsbl_skip_domain ca-egypt.com +uridnsbl_skip_domain ca-suisse.com +uridnsbl_skip_domain cafbank.org +uridnsbl_skip_domain cafonline.org +uridnsbl_skip_domain caisse-epargne.com +uridnsbl_skip_domain caisse-epargne.fr +uridnsbl_skip_domain caixa.gov.br +uridnsbl_skip_domain caixabank.com +uridnsbl_skip_domain cajasur.es +uridnsbl_skip_domain camsonline.com +uridnsbl_skip_domain canadiandirect.com +uridnsbl_skip_domain capitalone.com +uridnsbl_skip_domain capitalone360.com +uridnsbl_skip_domain capitaloneonline.co.uk +uridnsbl_skip_domain capitecbank.co.za +uridnsbl_skip_domain cariparma.it +uridnsbl_skip_domain carrefour-banque.fr +uridnsbl_skip_domain cartabcc.it +uridnsbl_skip_domain cartabccpos.it +uridnsbl_skip_domain cartasi.it +uridnsbl_skip_domain catalunyacaixa.com +uridnsbl_skip_domain cbg.gm +uridnsbl_skip_domain cbonline.co.uk +uridnsbl_skip_domain cembra.ch +uridnsbl_skip_domain cenbank.org +uridnsbl_skip_domain centralbank.ae +uridnsbl_skip_domain charitybank.org +uridnsbl_skip_domain chase.com +uridnsbl_skip_domain chebanca.it +uridnsbl_skip_domain chinatrust.com.tw +uridnsbl_skip_domain cial.ch +uridnsbl_skip_domain cibc.com +uridnsbl_skip_domain cic.ch +uridnsbl_skip_domain cimbclicks.com.my +uridnsbl_skip_domain citi.co.nz +uridnsbl_skip_domain citi.com +uridnsbl_skip_domain citi.eu +uridnsbl_skip_domain citibank.ae +uridnsbl_skip_domain citibank.co.in +uridnsbl_skip_domain citibank.co.uk +uridnsbl_skip_domain citibank.com +uridnsbl_skip_domain citibankonline.com +uridnsbl_skip_domain citibusiness.com +uridnsbl_skip_domain citicards.com +uridnsbl_skip_domain citigroup.com +uridnsbl_skip_domain citizensbank.ca +uridnsbl_skip_domain citizensbank.com +uridnsbl_skip_domain citizensbankonline.com +uridnsbl_skip_domain civibank.com +uridnsbl_skip_domain civibank.it +uridnsbl_skip_domain closebrothers.co.uk +uridnsbl_skip_domain closebrothers.com +uridnsbl_skip_domain clubsc.ch +uridnsbl_skip_domain co-operativebank.co.uk +uridnsbl_skip_domain colpatria.com +uridnsbl_skip_domain colpatria.com.co +uridnsbl_skip_domain commbank.com +uridnsbl_skip_domain commbank.com.au +uridnsbl_skip_domain commerzbank.com +uridnsbl_skip_domain commerzbank.de +uridnsbl_skip_domain coopbank.dk +uridnsbl_skip_domain corner.ch +uridnsbl_skip_domain cornerbanca.ch +uridnsbl_skip_domain cornercard.ch +uridnsbl_skip_domain cornercard.com +uridnsbl_skip_domain cosycard.ch +uridnsbl_skip_domain coutts.com +uridnsbl_skip_domain credit-agricole.com +uridnsbl_skip_domain credit-agricole.fr +uridnsbl_skip_domain credit-suisse.com +uridnsbl_skip_domain creditagricole.rs +uridnsbl_skip_domain cs.com +uridnsbl_skip_domain css.ch +uridnsbl_skip_domain ctbcbank.com +uridnsbl_skip_domain ctfs.com +uridnsbl_skip_domain cwbank.com +uridnsbl_skip_domain cwbankgroup.com +uridnsbl_skip_domain cwt.ca +uridnsbl_skip_domain cybg.com +uridnsbl_skip_domain danskebank.co.uk +uridnsbl_skip_domain danskebank.com +uridnsbl_skip_domain danskebank.de +uridnsbl_skip_domain danskebank.dk +uridnsbl_skip_domain danskebank.ee +uridnsbl_skip_domain danskebank.fi +uridnsbl_skip_domain danskebank.ie +uridnsbl_skip_domain danskebank.no +uridnsbl_skip_domain danskebankas.lt +uridnsbl_skip_domain datatrans.biz +uridnsbl_skip_domain datatrans.ch +uridnsbl_skip_domain db.com +uridnsbl_skip_domain dbs.com +uridnsbl_skip_domain demirbank.kg +uridnsbl_skip_domain denizbank.com +uridnsbl_skip_domain desjardins.ca +uridnsbl_skip_domain desjardins.com +uridnsbl_skip_domain deutsche-bank.de +uridnsbl_skip_domain deutschebank.be +uridnsbl_skip_domain deutschebank.co.nz +uridnsbl_skip_domain deutschebank.de +uridnsbl_skip_domain diamondbank.com +uridnsbl_skip_domain dibpak.com +uridnsbl_skip_domain discover.com +uridnsbl_skip_domain discovercard.com +uridnsbl_skip_domain discovery.co.za +uridnsbl_skip_domain dnbnord.lt +uridnsbl_skip_domain dresdner-bank.de +uridnsbl_skip_domain dsbbank.sr +uridnsbl_skip_domain duncanlawrie.com +uridnsbl_skip_domain e-gulfbank.com +uridnsbl_skip_domain easybank.at +uridnsbl_skip_domain ecobank.com +uridnsbl_skip_domain edwardjones.com +uridnsbl_skip_domain esunbank.com.tw +uridnsbl_skip_domain fednetbank.com +uridnsbl_skip_domain fidelity.com +uridnsbl_skip_domain fidor.de +uridnsbl_skip_domain finance.com +uridnsbl_skip_domain finansbank.com.tr +uridnsbl_skip_domain finasta.lt +uridnsbl_skip_domain fineco.it +uridnsbl_skip_domain firstbankcard.com +uridnsbl_skip_domain firstmerit.com +uridnsbl_skip_domain firstnational.com +uridnsbl_skip_domain firstnationalmerchantsolutions.com +uridnsbl_skip_domain firsttrustbank.co.uk +uridnsbl_skip_domain fnb-online.com +uridnsbl_skip_domain fnb.co.za +uridnsbl_skip_domain fnbc.ca +uridnsbl_skip_domain friuladria.it +uridnsbl_skip_domain garanti.com.tr +uridnsbl_skip_domain garantibank.eu +uridnsbl_skip_domain garantibank.nl +uridnsbl_skip_domain gazprombank.ch +uridnsbl_skip_domain gazprombank.ru +uridnsbl_skip_domain generali.es +uridnsbl_skip_domain genevoise.ch +uridnsbl_skip_domain gkb.ch +uridnsbl_skip_domain granitbank.hu +uridnsbl_skip_domain gtbank.com +uridnsbl_skip_domain halifax.co.uk +uridnsbl_skip_domain handelsbanken.se +uridnsbl_skip_domain harrodsbank.co.uk +uridnsbl_skip_domain hbl.com +uridnsbl_skip_domain hblibank.com +uridnsbl_skip_domain hblibank.com.pk +uridnsbl_skip_domain hdfcbank.com +uridnsbl_skip_domain heartland.co.nz +uridnsbl_skip_domain hellenicbank.com +uridnsbl_skip_domain hkbea.com +uridnsbl_skip_domain hlb.com.kh +uridnsbl_skip_domain hlb.com.my +uridnsbl_skip_domain hoaresbank.co.uk +uridnsbl_skip_domain home.barclays +uridnsbl_skip_domain hongleongconnect.com.kh +uridnsbl_skip_domain hongleongconnect.com.vn +uridnsbl_skip_domain hongleongconnect.my +uridnsbl_skip_domain hsbc.co.nz +uridnsbl_skip_domain hsbc.co.uk +uridnsbl_skip_domain hsbc.com +uridnsbl_skip_domain hsbc.com.ar +uridnsbl_skip_domain hsbc.com.hk +uridnsbl_skip_domain hypovereinsbank.co.uk +uridnsbl_skip_domain hypovereinsbank.de +uridnsbl_skip_domain icbcnz.com +uridnsbl_skip_domain icicibank.co.in +uridnsbl_skip_domain icicibank.com +uridnsbl_skip_domain icicibankprivatebanking.com +uridnsbl_skip_domain icorner.ch +uridnsbl_skip_domain icscards.de +uridnsbl_skip_domain icscards.nl +uridnsbl_skip_domain ing-diba.de +uridnsbl_skip_domain ing.be +uridnsbl_skip_domain ing.com +uridnsbl_skip_domain ing.lu +uridnsbl_skip_domain ing.nl +uridnsbl_skip_domain ingdirect.ca +uridnsbl_skip_domain ingdirect.fr +uridnsbl_skip_domain ingvysyabank.com +uridnsbl_skip_domain interac.ca +uridnsbl_skip_domain iobnet.co.in +uridnsbl_skip_domain isbank.com.tr +uridnsbl_skip_domain isbank.de +uridnsbl_skip_domain isbank.ge +uridnsbl_skip_domain isbank.iq +uridnsbl_skip_domain isbankkosova.com +uridnsbl_skip_domain itau.com.br +uridnsbl_skip_domain jpmchase.com +uridnsbl_skip_domain jpmorgan.com +uridnsbl_skip_domain jsafrasarasin.com +uridnsbl_skip_domain julianhodgebank.com +uridnsbl_skip_domain juliusbaer.com +uridnsbl_skip_domain jyskebank.dk +uridnsbl_skip_domain kantonalbank.ch +uridnsbl_skip_domain key.com +uridnsbl_skip_domain kiwibank.co.nz +uridnsbl_skip_domain kotak.com +uridnsbl_skip_domain kredytbank.pl +uridnsbl_skip_domain kreissparkasse-schwalm-eder.de +uridnsbl_skip_domain ksklb.de +uridnsbl_skip_domain kutxabank.es +uridnsbl_skip_domain laboralkutxa.com +uridnsbl_skip_domain lacaixa.cat +uridnsbl_skip_domain lacaixa.es +uridnsbl_skip_domain laurentianbank.ca +uridnsbl_skip_domain lbb.de +uridnsbl_skip_domain lcl.com +uridnsbl_skip_domain lcl.fr +uridnsbl_skip_domain lloydsbank.com +uridnsbl_skip_domain lloydsbankcommercial.com +uridnsbl_skip_domain lloydsbankinggroup.com +uridnsbl_skip_domain lloydstsb.ch +uridnsbl_skip_domain lloydstsb.co.uk +uridnsbl_skip_domain lombardodier.com +uridnsbl_skip_domain loydsbank.com +uridnsbl_skip_domain maerki-baumann.ch +uridnsbl_skip_domain mandtbank.com +uridnsbl_skip_domain manulife.com +uridnsbl_skip_domain manulifebank.ca +uridnsbl_skip_domain manulifebankselect.ca +uridnsbl_skip_domain manulifeone.ca +uridnsbl_skip_domain mashreqbank.com +uridnsbl_skip_domain mastercard.com +uridnsbl_skip_domain maybank2u.com +uridnsbl_skip_domain maybank2u.com.my +uridnsbl_skip_domain mdmbank.com +uridnsbl_skip_domain mechanicsbank.com +uridnsbl_skip_domain medbank.lt +uridnsbl_skip_domain metrobankdirect.com +uridnsbl_skip_domain metrobankonline.co.uk +uridnsbl_skip_domain migbank.com +uridnsbl_skip_domain migrosbank.ch +uridnsbl_skip_domain mizuhobank.co.jp +uridnsbl_skip_domain mmwarburg.lu +uridnsbl_skip_domain montepio.pt +uridnsbl_skip_domain morganstanley.com +uridnsbl_skip_domain mps.it +uridnsbl_skip_domain ms.com +uridnsbl_skip_domain mufg.jp +uridnsbl_skip_domain myonlineresourcecenter.com +uridnsbl_skip_domain myonlineservices.ch +uridnsbl_skip_domain nab.com.au +uridnsbl_skip_domain nationalesuisse.ch +uridnsbl_skip_domain nationwide-communications.co.uk +uridnsbl_skip_domain nationwide-service.co.uk +uridnsbl_skip_domain nationwide.co.uk +uridnsbl_skip_domain natwest.com +uridnsbl_skip_domain navyfederal.org +uridnsbl_skip_domain nbc.ca +uridnsbl_skip_domain newyorkfed.org +uridnsbl_skip_domain nibl.com.np +uridnsbl_skip_domain nordea.fi +uridnsbl_skip_domain nordea.lt +uridnsbl_skip_domain nordfynsbank.dk +uridnsbl_skip_domain norisbank.de +uridnsbl_skip_domain notenstein.ch +uridnsbl_skip_domain nuvisionfederal.com +uridnsbl_skip_domain oceanbank.com +uridnsbl_skip_domain onlinesbi.com +uridnsbl_skip_domain orchardbank.com +uridnsbl_skip_domain ostsaechsische-sparkasse-dresden.de +uridnsbl_skip_domain paylife.at +uridnsbl_skip_domain paypal-brasil.com.br +uridnsbl_skip_domain paypal-communication.com +uridnsbl_skip_domain paypal-community.com +uridnsbl_skip_domain paypal-customerfeedback.com +uridnsbl_skip_domain paypal-deutschland.de +uridnsbl_skip_domain paypal-exchanges.com +uridnsbl_skip_domain paypal-marketing.co.uk +uridnsbl_skip_domain paypal-marketing.pl +uridnsbl_skip_domain paypal-notify.com +uridnsbl_skip_domain paypal-now.com +uridnsbl_skip_domain paypal-opwaarderen.nl +uridnsbl_skip_domain paypal-pages.com +uridnsbl_skip_domain paypal-search.com +uridnsbl_skip_domain paypal-shopping.co.uk +uridnsbl_skip_domain paypal-techsupport.com +uridnsbl_skip_domain paypal.be +uridnsbl_skip_domain paypal.ca +uridnsbl_skip_domain paypal.ch +uridnsbl_skip_domain paypal.co.il +uridnsbl_skip_domain paypal.co.uk +uridnsbl_skip_domain paypal.com +uridnsbl_skip_domain paypal.com.au +uridnsbl_skip_domain paypal.com.br +uridnsbl_skip_domain paypal.com.mx +uridnsbl_skip_domain paypal.com.pt +uridnsbl_skip_domain paypal.de +uridnsbl_skip_domain paypal.dk +uridnsbl_skip_domain paypal.es +uridnsbl_skip_domain paypal.fr +uridnsbl_skip_domain paypal.it +uridnsbl_skip_domain paypal.net +uridnsbl_skip_domain paypal.nl +uridnsbl_skip_domain paypal.no +uridnsbl_skip_domain paypal.pt +uridnsbl_skip_domain paypal.ru +uridnsbl_skip_domain paypal.se +uridnsbl_skip_domain paypalobjects.com +uridnsbl_skip_domain pbebank.com +uridnsbl_skip_domain pcfinancial.ca +uridnsbl_skip_domain permanenttsb.ie +uridnsbl_skip_domain pnc.com +uridnsbl_skip_domain popolarevicenza.it +uridnsbl_skip_domain postbank.de +uridnsbl_skip_domain postepay.it +uridnsbl_skip_domain postfinance.ch +uridnsbl_skip_domain postfinance.info +uridnsbl_skip_domain postfinancearena.ch +uridnsbl_skip_domain publicislamicbank.com.my +uridnsbl_skip_domain rabobank.co.nz +uridnsbl_skip_domain rabobank.com +uridnsbl_skip_domain rabobank.nl +uridnsbl_skip_domain rahnbodmer.ch +uridnsbl_skip_domain raiffeisen.ch +uridnsbl_skip_domain raiffeisen.hu +uridnsbl_skip_domain raiffeisen.li +uridnsbl_skip_domain raiffeisen.ru +uridnsbl_skip_domain raiffeisenbank.rs +uridnsbl_skip_domain raphaelsbank.com +uridnsbl_skip_domain rbc.com +uridnsbl_skip_domain rbcroyalbank.com +uridnsbl_skip_domain rbs.co.uk +uridnsbl_skip_domain rbssecure.co.uk +uridnsbl_skip_domain rbsworldpay.com +uridnsbl_skip_domain rcb.at rcb.at +uridnsbl_skip_domain recordbank.be +uridnsbl_skip_domain regiobank.nl +uridnsbl_skip_domain regions.com +uridnsbl_skip_domain regionsnet.com +uridnsbl_skip_domain renasantbank.com +uridnsbl_skip_domain rhbgroup.com +uridnsbl_skip_domain rogersbank.com +uridnsbl_skip_domain rothschild.com +uridnsbl_skip_domain rothschildbank.com +uridnsbl_skip_domain royalbank.com +uridnsbl_skip_domain s.de +uridnsbl_skip_domain sagepay.co.uk +uridnsbl_skip_domain sagepay.com +uridnsbl_skip_domain sainsburysbank.co.uk +uridnsbl_skip_domain samba.com +uridnsbl_skip_domain santander.cl +uridnsbl_skip_domain santander.co.uk +uridnsbl_skip_domain santander.com +uridnsbl_skip_domain santander.com.br +uridnsbl_skip_domain santander.com.mx +uridnsbl_skip_domain santandercorretora.com.br +uridnsbl_skip_domain santanderesfera.com.br +uridnsbl_skip_domain santandersantiago.cl +uridnsbl_skip_domain sarasin.ch +uridnsbl_skip_domain sberbank.ch +uridnsbl_skip_domain sbs.net.nz +uridnsbl_skip_domain sc.com +uridnsbl_skip_domain schoellerbank.at +uridnsbl_skip_domain scotiabank.ca +uridnsbl_skip_domain scotiabank.com +uridnsbl_skip_domain scotiamocatta.com +uridnsbl_skip_domain scotiaonline.com +uridnsbl_skip_domain securetrustbank.com +uridnsbl_skip_domain service-sparkasse.de +uridnsbl_skip_domain serviciobancomer.com +uridnsbl_skip_domain shawbrook.co.uk +uridnsbl_skip_domain shkb.ch +uridnsbl_skip_domain six-group.com +uridnsbl_skip_domain six-payment-services.com +uridnsbl_skip_domain skrill.com +uridnsbl_skip_domain sls-direkt.de +uridnsbl_skip_domain snb.ch snb.ch +uridnsbl_skip_domain snsbank.nl +uridnsbl_skip_domain societegenerale.fr +uridnsbl_skip_domain sparda-a.de +uridnsbl_skip_domain sparda-b.de +uridnsbl_skip_domain sparda-bank-hamburg.de +uridnsbl_skip_domain sparda-bw.de +uridnsbl_skip_domain sparda-h.de +uridnsbl_skip_domain sparda-hessen.de +uridnsbl_skip_domain sparda-m.de +uridnsbl_skip_domain sparda-ms.de +uridnsbl_skip_domain sparda-n.de +uridnsbl_skip_domain sparda-ostbayern.de +uridnsbl_skip_domain sparda-sw.de +uridnsbl_skip_domain sparda-verband.de +uridnsbl_skip_domain sparda-west.de +uridnsbl_skip_domain sparkasse-bank-malta.com +uridnsbl_skip_domain sparkasse-bielefeld.de +uridnsbl_skip_domain sparkasse-bochum.de +uridnsbl_skip_domain sparkasse-gera-greiz.de +uridnsbl_skip_domain sparkasse-hamm.de +uridnsbl_skip_domain sparkasse-heidelberg.de +uridnsbl_skip_domain sparkasse-ingolstadt.de +uridnsbl_skip_domain sparkasse-mittelthueringen.de +uridnsbl_skip_domain sparkasse.at +uridnsbl_skip_domain sparkasse.ch +uridnsbl_skip_domain sparkasse.de +uridnsbl_skip_domain sparkasseblog.de +uridnsbl_skip_domain standardbank.co.za +uridnsbl_skip_domain standardbank.com +uridnsbl_skip_domain standardchartered.com.gh +uridnsbl_skip_domain standardchartered.com.my +uridnsbl_skip_domain suncorpbank.com.au +uridnsbl_skip_domain suntrust.com +uridnsbl_skip_domain swedbank.com +uridnsbl_skip_domain swedbank.ee +uridnsbl_skip_domain swedbank.lt +uridnsbl_skip_domain swedbank.lu +uridnsbl_skip_domain swedbank.se +uridnsbl_skip_domain swisscanto.ch +uridnsbl_skip_domain swisscaution.ch +uridnsbl_skip_domain swissquote.ch +uridnsbl_skip_domain sydbank.dk +uridnsbl_skip_domain tangerine.ca +uridnsbl_skip_domain tcb-bank.com.tw +uridnsbl_skip_domain tdbank.com +uridnsbl_skip_domain tdcommercialbanking.com +uridnsbl_skip_domain tescobank.com +uridnsbl_skip_domain tsb.co.nz +uridnsbl_skip_domain tsb.co.uk +uridnsbl_skip_domain tsbbank.co.nz +uridnsbl_skip_domain ubibanca.com +uridnsbl_skip_domain ubs.com +uridnsbl_skip_domain ulsterbank.co.uk +uridnsbl_skip_domain ulsterbankanytimebanking.co.uk +uridnsbl_skip_domain unibanco.pt +uridnsbl_skip_domain unibancoconnect.pt +uridnsbl_skip_domain unicredit.eu +uridnsbl_skip_domain unicredit.it +uridnsbl_skip_domain unicreditbank.lt +uridnsbl_skip_domain unicreditgroup.eu +uridnsbl_skip_domain unionbank.com +uridnsbl_skip_domain unionbankcameroon.com +uridnsbl_skip_domain unity.co.uk +uridnsbl_skip_domain uob.com.sg +uridnsbl_skip_domain uobgroup.com +uridnsbl_skip_domain usbank.com +uridnsbl_skip_domain valianttrust.com +uridnsbl_skip_domain vaudoise.ch +uridnsbl_skip_domain venetobanca.it +uridnsbl_skip_domain venetobanka.al +uridnsbl_skip_domain versabank.com +uridnsbl_skip_domain virginmoney.com +uridnsbl_skip_domain visa.com.ar +uridnsbl_skip_domain visa.com.br +uridnsbl_skip_domain visaeurope.ch +uridnsbl_skip_domain visaeurope.com +uridnsbl_skip_domain viseca.ch +uridnsbl_skip_domain volksbank.de +uridnsbl_skip_domain volkswagenbank.de +uridnsbl_skip_domain vpbank.com +uridnsbl_skip_domain vr.de +uridnsbl_skip_domain vwbank.de +uridnsbl_skip_domain wachovia.com +uridnsbl_skip_domain weatherbys.co.uk +uridnsbl_skip_domain wegelin.ch +uridnsbl_skip_domain wellsfargo.com +uridnsbl_skip_domain wellsfargoemail.com +uridnsbl_skip_domain westernunion.ca +uridnsbl_skip_domain westernunion.com +uridnsbl_skip_domain westernunion.fr +uridnsbl_skip_domain westernunion.se +uridnsbl_skip_domain westpac.co.nz +uridnsbl_skip_domain westpac.com.au +uridnsbl_skip_domain westpac.com.nz +uridnsbl_skip_domain wir.ch +uridnsbl_skip_domain worldbank.org +uridnsbl_skip_domain worldpay.com +uridnsbl_skip_domain wvb.de +uridnsbl_skip_domain yacht.nl +uridnsbl_skip_domain ybonline.co.uk +uridnsbl_skip_domain yorkshirebank.co.uk +uridnsbl_skip_domain yourbankcard.com +uridnsbl_skip_domain zagbank.ca +uridnsbl_skip_domain zenithbank.com +uridnsbl_skip_domain zkb.ch +uridnsbl_skip_domain zugerkb.ch +endif # Mail::SpamAssassin::Plugin::URIDNSBL diff --git rules-extras/README.txt rules-extras/README.txt new file mode 100644 index 000000000..030088f02 --- /dev/null +++ rules-extras/README.txt @@ -0,0 +1,3 @@ +Rules in this directory are NOT processed by masschecks or sa-update +Use at your own risk. + diff --git rules/20_aux_tlds.cf rules/20_aux_tlds.cf index 31dc74f08..c15a8bbd0 100644 --- rules/20_aux_tlds.cf +++ rules/20_aux_tlds.cf @@ -43,7 +43,7 @@ endif # # Basic list can be obtained with the following command: -# wget http://data.iana.org/TLD/tlds-alpha-by-domain.txt -O - | tail -n+2 | perl -ne 'print lc' +# wget http://data.iana.org/TLD/tlds-alpha-by-domain.txt -q -O - | tail -n+2 | perl -ne 'print lc' # Current list may include more or less.. TODO easier maintenance? # util_rb_tld only accepts alpha (a-z) input before RegistryBoundaries was @@ -51,90 +51,145 @@ endif # this block # # For an up to date list of IDN TLDs that can be pasted into this block, run this command: -# wget http://data.iana.org/TLD/tlds-alpha-by-domain.txt -O - | tail -n+2 | grep -i 'xn--' | tr '\n' ' ' | fold -w 80 -s | perl -e 's/^/util_rb_tld / && print lc while <>' && echo +# wget http://data.iana.org/TLD/tlds-alpha-by-domain.txt -q -O - | grep -i '^xn--' | tr '\n' ' ' | fold -w 80 -s | perl -pe 'chomp; s/.*/util_rb_tld \L$_\n/' +# Since version 4.0 the util_rb_tld also accepts Unicode IDN labels (encoded as UTF-8), e.g.: +# wget http://data.iana.org/TLD/tlds-alpha-by-domain.txt -q -O - | grep -i '^xn--' | idn -u | tr '\n' ' ' | fold -w 80 -s | perl -pe 'chomp; s/.*/util_rb_tld \L$_\n/' if (can(Mail::SpamAssassin::Conf::feature_registryboundaries)) -util_rb_tld xn--1qqw23a xn--30rr7y xn--3bst00m xn--3ds443g xn--3e0b707e xn--45brj9c -util_rb_tld xn--45q11c xn--4gbrim xn--55qw42g xn--55qx5d xn--6frz82g xn--6qq986b3xl -util_rb_tld xn--80adxhks xn--80ao21a xn--80asehdb xn--80aswg xn--90a3ac xn--90ais -util_rb_tld xn--9et52u xn--b4w605ferd xn--c1avg xn--cg4bki xn--clchc0ea0b2g2a9gcd -util_rb_tld xn--czr694b xn--czrs0t xn--czru2d xn--d1acj3b xn--d1alf xn--fiq228c5hs -util_rb_tld xn--fiq64b xn--fiqs8s xn--fiqz9s xn--flw351e xn--fpcrj9c3d xn--fzc2c9e2c -util_rb_tld xn--gecrj9c xn--h2brj9c xn--hxt814e xn--i1b6b1a6a2e xn--io0a7i xn--j1amh -util_rb_tld xn--j6w193g xn--kprw13d xn--kpry57d xn--kput3i xn--l1acc xn--lgbbat1ad8j -util_rb_tld xn--mgb9awbf xn--mgba3a4f16a xn--mgbaam7a8h xn--mgbab2bd xn--mgbayh7gpa -util_rb_tld xn--mgbbh1a71e xn--mgbc0a9azcg xn--mgberp4a5d4ar xn--mgbx4cd0ab xn--mxtq1m -util_rb_tld xn--ngbc5azd xn--node xn--nqv7f xn--nqv7fs00ema xn--nyqy26a xn--o3cw4h -util_rb_tld xn--ogbpf8fl xn--p1acf xn--p1ai xn--pgbs0dh xn--q9jyb4c xn--qcka1pmc -util_rb_tld xn--rhqv96g xn--s9brj9c xn--ses554g xn--unup4y xn--vermgensberater-ctb +util_rb_tld xn--11b4c3d xn--1qqw23a xn--30rr7y xn--3bst00m xn--3ds443g xn--3e0b707e +util_rb_tld xn--3pxu8k xn--42c2d9a xn--45brj9c xn--45q11c xn--4gbrim xn--55qw42g xn--55qx5d +util_rb_tld xn--6frz82g xn--6qq986b3xl xn--80adxhks xn--80ao21a xn--80asehdb xn--80aswg +util_rb_tld xn--90a3ac xn--90ais xn--9dbq2a xn--9et52u xn--b4w605ferd xn--c1avg xn--c2br7g +util_rb_tld xn--cg4bki xn--clchc0ea0b2g2a9gcd xn--czr694b xn--czrs0t xn--czru2d xn--d1acj3b +util_rb_tld xn--d1alf xn--eckvdtc9d xn--efvy88h xn--estv75g xn--fhbei xn--fiq228c5hs +util_rb_tld xn--fiq64b xn--fiqs8s xn--fiqz9s xn--fjq720a xn--flw351e xn--fpcrj9c3d +util_rb_tld xn--fzc2c9e2c xn--gecrj9c xn--h2brj9c xn--hxt814e xn--i1b6b1a6a2e xn--imr513n +util_rb_tld xn--io0a7i xn--j1aef xn--j1amh xn--j6w193g xn--jlq61u9w7b xn--kcrx77d1x4a +util_rb_tld xn--kprw13d xn--kpry57d xn--kpu716f xn--kput3i xn--l1acc xn--lgbbat1ad8j +util_rb_tld xn--mgb9awbf xn--mgba3a3ejt xn--mgba3a4f16a xn--mgbaam7a8h xn--mgbab2bd +util_rb_tld xn--mgbayh7gpa xn--mgbb9fbpob xn--mgbbh1a71e xn--mgbc0a9azcg xn--mgberp4a5d4ar +util_rb_tld xn--mgbpl2fh xn--mgbt3dhd xn--mgbtx2b xn--mgbx4cd0ab xn--mk1bu44c xn--mxtq1m +util_rb_tld xn--ngbc5azd xn--ngbe9e0a xn--node xn--nqv7f xn--nqv7fs00ema xn--nyqy26a +util_rb_tld xn--o3cw4h xn--ogbpf8fl xn--p1acf xn--p1ai xn--pbt977c xn--pgbs0dh xn--pssy2u +util_rb_tld xn--q9jyb4c xn--qcka1pmc xn--qxam xn--rhqv96g xn--s9brj9c xn--ses554g +util_rb_tld xn--t60b56a xn--tckwe xn--unup4y xn--vermgensberater-ctb util_rb_tld xn--vermgensberatung-pwb xn--vhquv xn--vuq861b xn--wgbh1c xn--wgbl6a -util_rb_tld xn--xhq521b xn--xkc2al3hye2a xn--xkc2dl3a5ee0h xn--yfro4i67o xn--ygbi2ammx -util_rb_tld xn--zfr164b +util_rb_tld xn--xhq521b xn--xkc2al3hye2a xn--xkc2dl3a5ee0h xn--y9a3aq xn--yfro4i67o +util_rb_tld xn--ygbi2ammx xn--zfr164b endif # Standard List # For an up to date list of TLDs that can be pasted into this block, run this command: -# wget http://data.iana.org/TLD/tlds-alpha-by-domain.txt -O - | tail -n+2 | grep -vi 'xn--' | tr '\n' ' ' | fold -w 80 -s | perl -e 's/^/util_rb_tld / && print lc while <>' && echo - -util_rb_tld abbott abogado ac academy accountant accountants active actor ad ads adult ae -util_rb_tld aero af afl ag agency ai airforce al allfinanz alsace am amsterdam an android -util_rb_tld ao apartments aq aquarelle ar archi army arpa as asia associates at attorney au -util_rb_tld auction audio autos aw ax axa az ba band bank bar barclaycard barclays bargains -util_rb_tld bauhaus bayern bb bbc bd be beer berlin best bf bg bh bi bid bike bingo bio biz -util_rb_tld bj black blackfriday bloomberg blue bm bmw bn bnpparibas bo boats bond boo -util_rb_tld boutique br brussels bs bt budapest build builders business buzz bv bw by bz -util_rb_tld bzh ca cab cafe cal camera camp cancerresearch canon capetown capital caravan -util_rb_tld cards care career careers cartier casa cash casino cat catering cbn cc cd -util_rb_tld center ceo cern cf cfd cg ch channel chat cheap chloe christmas chrome church -util_rb_tld ci citic city ck cl claims cleaning click clinic clothing club cm cn co coach -util_rb_tld codes coffee college cologne com community company computer condos construction -util_rb_tld consulting contractors cooking cool coop country courses cr credit creditcard -util_rb_tld cricket crs cruises cu cuisinella cv cw cx cy cymru cyou cz dabur dad dance -util_rb_tld date dating datsun day dclk de deals degree delivery democrat dental dentist -util_rb_tld desi design dev diamonds diet digital direct directory discount dj dk dm dnp do -util_rb_tld docs doha domains doosan download durban dvag dz eat ec edu education ee eg -util_rb_tld email emerck energy engineer engineering enterprises epson equipment er erni es -util_rb_tld esq estate et eu eurovision eus events everbank exchange expert exposed express -util_rb_tld fail faith fan fans farm fashion feedback fi film finance financial firmdale -util_rb_tld fish fishing fit fitness fj fk flights florist flowers flsmidth fly fm fo foo -util_rb_tld football forex forsale foundation fr frl frogans fund furniture futbol ga gal -util_rb_tld gallery garden gb gbiz gd gdn ge gent gf gg ggee gh gi gift gifts gives gl -util_rb_tld glass gle global globo gm gmail gmo gmx gn gold goldpoint golf goo goog google -util_rb_tld gop gov gp gq gr graphics gratis green gripe gs gt gu guge guide guitars guru -util_rb_tld gw gy hamburg hangout haus healthcare help here hermes hiphop hiv hk hm hn -util_rb_tld holdings holiday homes horse host hosting house how hr ht hu ibm id ie ifm il -util_rb_tld im immo immobilien in industries infiniti info ing ink institute insure int -util_rb_tld international investments io iq ir irish is it iwc java jcb je jetzt jm jo jobs -util_rb_tld joburg jp juegos kaufen kddi ke kg kh ki kim kitchen kiwi km kn koeln komatsu -util_rb_tld kp kr krd kred kw ky kyoto kz la lacaixa land lat latrobe lawyer lb lc lds -util_rb_tld lease leclerc legal lgbt li lidl life lighting limited limo link lk loan loans -util_rb_tld london lotte lotto love lr ls lt ltda lu luxe luxury lv ly ma madrid maif -util_rb_tld maison management mango market marketing markets marriott mc md me media meet -util_rb_tld melbourne meme memorial menu mg mh miami mil mini mk ml mm mma mn mo mobi moda -util_rb_tld moe monash money mormon mortgage moscow motorcycles mov movie mp mq mr ms mt -util_rb_tld mtn mtpc mu museum mv mw mx my mz na nagoya name navy nc ne net network neustar -util_rb_tld new news nexus nf ng ngo nhk ni nico ninja nissan nl no np nr nra nrw ntt nu -util_rb_tld nyc nz okinawa om one ong onl online ooo oracle org organic osaka otsuka ovh pa -util_rb_tld page panerai paris partners parts party pe pf pg ph pharmacy photo photography -util_rb_tld photos physio piaget pics pictet pictures pink pizza pk pl place plumbing plus -util_rb_tld pm pn pohl poker porn post pr praxi press pro prod productions prof properties -util_rb_tld property ps pt pub pw py qa qpon quebec racing re realtor recipes red redstone -util_rb_tld rehab reise reisen reit ren rentals repair report republican rest restaurant -util_rb_tld review reviews rich rio rip ro rocks rodeo rs rsvp ru ruhr rw ryukyu sa -util_rb_tld saarland sale samsung sap sarl saxo sb sc sca scb schmidt scholarships school -util_rb_tld schule schwarz science scot sd se services sew sexy sg sh shiksha shoes shriram -util_rb_tld si singles site sj sk sky sl sm sn so social software sohu solar solutions soy -util_rb_tld space spiegel spreadbetting sr st study style su sucks supplies supply support -util_rb_tld surf surgery suzuki sv sx sy sydney systems sz taipei tatar tattoo tax tc td -util_rb_tld tech technology tel temasek tennis tf tg th tickets tienda tips tires tirol tj -util_rb_tld tk tl tm tn to today tokyo tools top toshiba tours town toys tr trade trading -util_rb_tld training travel trust tt tui tv tw tz ua ug uk university uno uol us uy uz va -util_rb_tld vacations vc ve vegas ventures versicherung vet vg vi viajes video villas -util_rb_tld vision vlaanderen vn vodka vote voting voto voyage vu wales wang watch webcam -util_rb_tld website wed wedding wf whoswho wien wiki williamhill win wme work works world -util_rb_tld ws wtc wtf xin xxx xyz yachts yandex ye yodobashi yoga yokohama youtube yt za -util_rb_tld zip zm zone zuerich zw +# wget http://data.iana.org/TLD/tlds-alpha-by-domain.txt -q -O - | tail -n+2 | grep -vi '^xn--' | tr '\n' ' ' | fold -w 80 -s | perl -pe 'chomp; s/.*/util_rb_tld \L$_\n/' +util_rb_tld aaa aarp abarth abb abbott abbvie abc able abogado abudhabi ac academy +util_rb_tld accenture accountant accountants aco active actor ad adac ads adult ae aeg aero +util_rb_tld aetna af afamilycompany afl ag agakhan agency ai aig aigo airbus airforce +util_rb_tld airtel akdn al alfaromeo alibaba alipay allfinanz allstate ally alsace alstom +util_rb_tld am americanexpress americanfamily amex amfam amica amsterdam analytics android +util_rb_tld anquan anz ao aol apartments app apple aq aquarelle ar aramco archi army arpa +util_rb_tld art arte as asda asia associates at athleta attorney au auction audi audible +util_rb_tld audio auspost author auto autos avianca aw aws ax axa az azure ba baby baidu +util_rb_tld banamex bananarepublic band bank bar barcelona barclaycard barclays barefoot +util_rb_tld bargains baseball basketball bauhaus bayern bb bbc bbt bbva bcg bcn bd be beats +util_rb_tld beauty beer bentley berlin best bestbuy bet bf bg bh bharti bi bible bid bike +util_rb_tld bing bingo bio biz bj black blackfriday blanco blockbuster blog bloomberg blue +util_rb_tld bm bms bmw bn bnl bnpparibas bo boats boehringer bofa bom bond boo book booking +util_rb_tld boots bosch bostik bot boutique br bradesco bridgestone broadway broker brother +util_rb_tld brussels bs bt budapest bugatti build builders business buy buzz bv bw by bz +util_rb_tld bzh ca cab cafe cal call calvinklein cam camera camp cancerresearch canon +util_rb_tld capetown capital capitalone car caravan cards care career careers cars cartier +util_rb_tld casa case caseih cash casino cat catering cba cbn cbre cbs cc cd ceb center ceo +util_rb_tld cern cf cfa cfd cg ch chanel channel chase chat cheap chintai chloe christmas +util_rb_tld chrome chrysler church ci cipriani circle cisco citadel citi citic city +util_rb_tld cityeats ck cl claims cleaning click clinic clinique clothing cloud club +util_rb_tld clubmed cm cn co coach codes coffee college cologne com comcast commbank +util_rb_tld community company compare computer comsec condos construction consulting +util_rb_tld contact contractors cooking cookingchannel cool coop corsica country coupon +util_rb_tld coupons courses cr credit creditcard creditunion cricket crown crs cruises csc +util_rb_tld cu cuisinella cv cw cx cy cymru cyou cz dabur dad dance date dating datsun day +util_rb_tld dclk dds de deal dealer deals degree delivery dell deloitte delta democrat +util_rb_tld dental dentist desi design dev dhl diamonds diet digital direct directory +util_rb_tld discount discover dish diy dj dk dm dnp do docs doctor dodge dog doha domains +util_rb_tld dot download drive dtv dubai duck dunlop duns dupont durban dvag dvr dz earth +util_rb_tld eat ec eco edeka edu education ee eg email emerck energy engineer engineering +util_rb_tld enterprises epost epson equipment er ericsson erni es esq estate esurance et eu +util_rb_tld eurovision eus events everbank exchange expert exposed express extraspace fage +util_rb_tld fail fairwinds faith family fan fans farm farmers fashion fast fedex feedback +util_rb_tld ferrari ferrero fi fiat fidelity fido film final finance financial fire +util_rb_tld firestone firmdale fish fishing fit fitness fj fk flickr flights flir florist +util_rb_tld flowers fly fm fo foo foodnetwork football ford forex forsale forum foundation +util_rb_tld fox fr free fresenius frl frogans frontdoor frontier ftr fujitsu fujixerox fund +util_rb_tld furniture futbol fyi ga gal gallery gallo gallup game games gap garden gb gbiz +util_rb_tld gd gdn ge gea gent genting george gf gg ggee gh gi gift gifts gives giving gl +util_rb_tld glade glass gle global globo gm gmail gmbh gmo gmx gn godaddy gold goldpoint +util_rb_tld golf goo goodhands goodyear goog google gop got gov gp gq gr grainger graphics +util_rb_tld gratis green gripe group gs gt gu guardian gucci guge guide guitars guru gw gy +util_rb_tld hamburg hangout haus hbo hdfc hdfcbank health healthcare help helsinki here +util_rb_tld hermes hgtv hiphop hisamitsu hitachi hiv hk hkt hm hn hockey holdings holiday +util_rb_tld homedepot homegoods homes homesense honda honeywell horse host hosting hot +util_rb_tld hoteles hotmail house how hr hsbc ht htc hu hughes hyatt hyundai ibm icbc ice +util_rb_tld icu id ie ieee ifm iinet ikano il im imamat imdb immo immobilien in industries +util_rb_tld infiniti info ing ink institute insurance insure int intel international intuit +util_rb_tld investments io ipiranga iq ir irish is iselect ismaili ist istanbul it itau itv +util_rb_tld iveco iwc jaguar java jcb jcp je jeep jetzt jewelry jlc jll jm jmp jnj jo jobs +util_rb_tld joburg jot joy jp jpmorgan jprs juegos juniper kaufen kddi ke kerryhotels +util_rb_tld kerrylogistics kerryproperties kfh kg kh ki kia kim kinder kindle kitchen kiwi +util_rb_tld km kn koeln komatsu kosher kp kpmg kpn kr krd kred kuokgroup kw ky kyoto kz la +util_rb_tld lacaixa ladbrokes lamborghini lamer lancaster lancia lancome land landrover +util_rb_tld lanxess lasalle lat latino latrobe law lawyer lb lc lds lease leclerc lefrak +util_rb_tld legal lego lexus lgbt li liaison lidl life lifeinsurance lifestyle lighting +util_rb_tld like lilly limited limo lincoln linde link lipsy live living lixil lk loan +util_rb_tld loans locker locus loft lol london lotte lotto love lpl lplfinancial lr ls lt +util_rb_tld ltd ltda lu lundbeck lupin luxe luxury lv ly ma macys madrid maif maison makeup +util_rb_tld man management mango market marketing markets marriott marshalls maserati +util_rb_tld mattel mba mc mcd mcdonalds mckinsey md me med media meet melbourne meme +util_rb_tld memorial men menu meo metlife mg mh miami microsoft mil mini mint mit +util_rb_tld mitsubishi mk ml mlb mls mm mma mn mo mobi mobily moda moe moi mom monash money +util_rb_tld monster montblanc mopar mormon mortgage moscow motorcycles mov movie movistar +util_rb_tld mp mq mr ms msd mt mtn mtpc mtr mu museum mutual mutuelle mv mw mx my mz na nab +util_rb_tld nadex nagoya name nationwide natura navy nba nc ne nec net netbank netflix +util_rb_tld network neustar new newholland news next nextdirect nexus nf nfl ng ngo nhk ni +util_rb_tld nico nike nikon ninja nissan nissay nl no nokia northwesternmutual norton now +util_rb_tld nowruz nowtv np nr nra nrw ntt nu nyc nz obi observer off office okinawa olayan +util_rb_tld olayangroup oldnavy ollo om omega one ong onl online onyourside ooo open oracle +util_rb_tld orange org organic orientexpress origins osaka otsuka ott ovh pa page +util_rb_tld pamperedchef panasonic panerai paris pars partners parts party passagens pay +util_rb_tld pccw pe pet pf pfizer pg ph pharmacy philips photo photography photos physio +util_rb_tld piaget pics pictet pictures pid pin ping pink pioneer pizza pk pl place play +util_rb_tld playstation plumbing plus pm pn pnc pohl poker politie porn post pr pramerica +util_rb_tld praxi press prime pro prod productions prof progressive promo properties +util_rb_tld property protection pru prudential ps pt pub pw pwc py qa qpon quebec quest qvc +util_rb_tld racing radio raid re read realestate realtor realty recipes red redstone +util_rb_tld redumbrella rehab reise reisen reit ren rent rentals repair report republican +util_rb_tld rest restaurant review reviews rexroth rich richardli ricoh rightathome rio rip +util_rb_tld ro rocher rocks rodeo rogers room rs rsvp ru ruhr run rw rwe ryukyu sa saarland +util_rb_tld safe safety sakura sale salon samsclub samsung sandvik sandvikcoromant sanofi +util_rb_tld sap sapo sarl sas save saxo sb sbi sbs sc sca scb schaeffler schmidt +util_rb_tld scholarships school schule schwarz science scjohnson scor scot sd se seat +util_rb_tld secure security seek select sener services ses seven sew sex sexy sfr sg sh +util_rb_tld shangrila sharp shaw shell shia shiksha shoes shop shopping shouji show +util_rb_tld showtime shriram si silk sina singles site sj sk ski skin sky skype sl sling sm +util_rb_tld smart smile sn sncf so soccer social softbank software sohu solar solutions +util_rb_tld song sony soy space spiegel spot spreadbetting sr srl srt st stada staples star +util_rb_tld starhub statebank statefarm statoil stc stcgroup stockholm storage store stream +util_rb_tld studio study style su sucks supplies supply support surf surgery suzuki sv +util_rb_tld swatch swiftcover swiss sx sy sydney symantec systems sz tab taipei talk taobao +util_rb_tld target tatamotors tatar tattoo tax taxi tc tci td tdk team tech technology tel +util_rb_tld telecity telefonica temasek tennis teva tf tg th thd theater theatre tiaa +util_rb_tld tickets tienda tiffany tips tires tirol tj tjmaxx tjx tk tkmaxx tl tm tmall tn +util_rb_tld to today tokyo tools top toray toshiba total tours town toyota toys tr trade +util_rb_tld trading training travel travelchannel travelers travelersinsurance trust trv tt +util_rb_tld tube tui tunes tushu tv tvs tw tz ua ubank ubs uconnect ug uk unicom university +util_rb_tld uno uol ups us uy uz va vacations vana vanguard vc ve vegas ventures verisign +util_rb_tld versicherung vet vg vi viajes video vig viking villas vin vip virgin visa +util_rb_tld vision vista vistaprint viva vivo vlaanderen vn vodka volkswagen volvo vote +util_rb_tld voting voto voyage vu vuelos wales walmart walter wang wanggou warman watch +util_rb_tld watches weather weatherchannel webcam weber website wed wedding weibo weir wf +util_rb_tld whoswho wien wiki williamhill win windows wine winners wme wolterskluwer +util_rb_tld woodside work works world wow ws wtc wtf xbox xerox xfinity xihuan xin xperia +util_rb_tld xxx xyz yachts yahoo yamaxun yandex ye yodobashi yoga yokohama you youtube yt +util_rb_tld yun za zappos zara zero zip zippo zm zone zuerich zw # # 2nd level TLD list @@ -221,9 +276,10 @@ util_rb_2tld 2000.hu agrar.hu bolt.hu casino.hu city.hu co.hu erotica.hu erotika util_rb_2tld ac.id co.id go.id mil.id net.id or.id sch.id web.id util_rb_2tld gov.ie util_rb_2tld ac.il co.il gov.il idf.il k12.il muni.il net.il org.il -util_rb_2tld ac.im co.im gov.im net.im nic.im org.im +util_rb_2tld ac.im co.im com.im gov.im net.im nic.im org.im util_rb_2tld ac.in co.in edu.in ernet.in firm.in gen.in gov.in ind.in mil.in net.in nic.in org.in res.in util_rb_2tld com.io gov.io mil.io net.io org.io +util_rb_2tld gov.iq util_rb_2tld ac.ir co.ir gov.ir id.ir net.ir org.ir sch.ir util_rb_2tld edu.it gov.it util_rb_2tld ac.je co.je gov.je ind.je jersey.je ltd.je net.je org.je sch.je @@ -271,7 +327,7 @@ util_rb_2tld ac.ni biz.ni com.ni edu.ni gob.ni in.ni info.ni int.ni mil.ni net.n util_rb_2tld fhs.no folkebibl.no fylkesbibl.no herad.no idrett.no kommune.no mil.no museum.no priv.no stat.no tel.no vgs.no util_rb_2tld com.np edu.np gov.np mil.np net.np org.np util_rb_2tld biz.nr co.nr com.nr edu.nr fax.nr gov.nr info.nr mob.nr mobil.nr mobile.nr net.nr org.nr tel.nr tlf.nr -util_rb_2tld ac.nz co.nz cri.nz geek.nz gen.nz govt.nz iwi.nz maori.nz mil.nz net.nz org.nz school.nz +util_rb_2tld ac.nz co.nz cri.nz geek.nz gen.nz govt.nz iwi.nz kiwi.nz maori.nz mil.nz net.nz org.nz parliament.nz school.nz util_rb_2tld ac.om biz.om co.om com.om edu.om gov.om med.om mil.om mod.om museum.om net.om org.om pro.om sch.om util_rb_2tld dk.org eu.org util_rb_2tld abo.pa ac.pa com.pa edu.pa gob.pa ing.pa med.pa net.pa nom.pa org.pa sld.pa @@ -562,6 +618,9 @@ util_rb_2tld in.net util_rb_2tld com.de util_rb_2tld biz.ua util_rb_2tld azurewebsites.net +util_rb_2tld azureedge.net +util_rb_2tld zohosites.com +util_rb_2tld wixsite.com # util_rb_2tld neostrada.pl util_rb_2tld vv.cc @@ -571,6 +630,7 @@ util_rb_2tld shop.co util_rb_2tld tumblr.com util_rb_2tld fileave.com util_rb_2tld de.tl +util_rb_2tld co.com # Dyndns.com util_rb_2tld dyndns-at-home.com util_rb_2tld dyndns-at-work.com @@ -601,6 +661,15 @@ util_rb_2tld no-ip.com util_rb_2tld no-ip.info util_rb_2tld no-ip.net util_rb_2tld no-ip.org +# +util_rb_2tld gotdns.ch +util_rb_2tld ddnsking.com +util_rb_2tld ddns.net +util_rb_2tld bounceme.net +util_rb_2tld hopto.org +util_rb_2tld serveblog.net +util_rb_2tld myftp.org +util_rb_2tld myftp.biz # @@ -628,6 +697,8 @@ util_rb_3tld skydrive.live.com util_rb_3tld docs.google.com util_rb_3tld spaces.msn.com util_rb_3tld blog.friendster.com +util_rb_3tld plc.co.im +util_rb_3tld ltd.co.im util_rb_3tld sakura.ne.jp util_rb_3tld web.officelive.com util_rb_3tld com.sapo.pt diff --git rules/20_dnsbl_tests.cf rules/20_dnsbl_tests.cf index 5c66b6ca6..d7d1627fc 100644 --- rules/20_dnsbl_tests.cf +++ rules/20_dnsbl_tests.cf @@ -44,7 +44,8 @@ ifplugin Mail::SpamAssassin::Plugin::DNSEval # transfers: both axfr and ixfr available # URL: http://www.dnsbl.sorbs.net/ # pay-to-use: no -# delist: $50 fee for RCVD_IN_SORBS_SPAM, others have free retest on request +# There is no charge for removal from the proxy, vulnerability, relay, zombie, spam or DUHL databases. +# (as per http://www.sorbs.net/overview.shtml 08-2016) header __RCVD_IN_SORBS eval:check_rbl('sorbs', 'dnsbl.sorbs.net.') describe __RCVD_IN_SORBS SORBS: sender is listed in SORBS @@ -71,11 +72,10 @@ describe RCVD_IN_SORBS_SMTP SORBS: sender is open SMTP relay tflags RCVD_IN_SORBS_SMTP net reuse RCVD_IN_SORBS_SMTP -# delist: $50 fee -#header RCVD_IN_SORBS_SPAM eval:check_rbl_sub('sorbs', '127.0.0.6') -#describe RCVD_IN_SORBS_SPAM SORBS: sender is a spam source -#tflags RCVD_IN_SORBS_SPAM net -#reuse RCVD_IN_SORBS_SPAM RCVD_IN_SORBS_SPAM +header RCVD_IN_SORBS_SPAM eval:check_rbl_sub('sorbs', '127.0.0.6') +describe RCVD_IN_SORBS_SPAM SORBS: sender is a spam source +tflags RCVD_IN_SORBS_SPAM net +reuse RCVD_IN_SORBS_SPAM RCVD_IN_SORBS_SPAM header RCVD_IN_SORBS_WEB eval:check_rbl_sub('sorbs', '127.0.0.7') describe RCVD_IN_SORBS_WEB SORBS: sender is an abusable web server diff --git rules/20_drugs.cf rules/20_drugs.cf index 124f7603c..efffbb4da 100644 --- rules/20_drugs.cf +++ rules/20_drugs.cf @@ -37,7 +37,7 @@ require_version @@VERSION@@ # header rules # (only use sufficiently long drug name to make name unique) -header SUBJECT_DRUG_GAP_C Subject =~ /\bc.{0,2}i.{0,2}a.{0,2}l.{0,2}i.{0,2}s\b/i +header SUBJECT_DRUG_GAP_C Subject =~ /\bc[\sc]{0,2}i[\si]{0,2}a[\sa]{0,2}l[\sl]{0,2}i[\si]{0,2}s{1,3}\b/i describe SUBJECT_DRUG_GAP_C Subject contains a gappy version of 'cialis' header SUBJECT_DRUG_GAP_L Subject =~ /l.{0,2}e.{0,2}v.{0,2}i.{0,2}t.{0,2}r.{0,2}a/i diff --git rules/20_freemail.cf rules/20_freemail.cf index 2f536d506..93fca7330 100644 --- rules/20_freemail.cf +++ rules/20_freemail.cf @@ -27,7 +27,7 @@ ifplugin Mail::SpamAssassin::Plugin::FreeMail body __freemail_safe_fwd /---\s?(?:(?:Forwarded|Original) message|Alkuper.inen viesti)/i header __freemail_safe_rls X-Spam-Relays-External =~ /^[^\]]+ rdns=\S+\.(?:tfbnw\.net|ebay\.com|tieto\.com) / -meta __freemail_safe __freemail_safe_fwd || __ML2 || __ML4 || __HAS_X_MAILING_LIST || __freemail_safe_rls +meta __freemail_safe __freemail_safe_fwd || __ML2 || __ML4 || __HAS_X_MAILING_LIST || __HAS_X_MAILMAN_VERSION || __freemail_safe_rls header __freemail_replyto eval:check_freemail_replyto('replyto') meta FREEMAIL_REPLYTO __freemail_replyto && !__freemail_safe diff --git rules/20_freemail_domains.cf rules/20_freemail_domains.cf index 5ebafdf11..d301e5e63 100644 --- rules/20_freemail_domains.cf +++ rules/20_freemail_domains.cf @@ -31,7 +31,7 @@ # for (@F) { s/\./\\./g; s/\?/./g; s/\*/[^.]*/g; print } # }' rules/*.cf |grep -wvf- FILE -# Updated 2014-09-17-axb +# Updated 2016-08-18-axb ifplugin Mail::SpamAssassin::Plugin::FreeMail @@ -68,7 +68,7 @@ freemail_domains barlick.net beeebank.com beehive.org freemail_domains been-there.com beirut.com belizehome.com belizemail.net freemail_domains belizeweb.com bellsouth.net berlin.de bestmail.us bflomail.com freemail_domains bgnmail.com bharatmail.com big-orange.com bigboss.cz bigfoot.com bigger.com -freemail_domains bigmailbox.com bigmir.net bigstring.com bip.net +freemail_domains bigmailbox.com bigmir.net bigstring.com bip.net bigpond.com freemail_domains bitwiser.com biz.by bizhosting.com black-sea.ro blackburnmail.com freemail_domains blackglobalnetwork.net blink182.net blue.devils.com bluebottle.com freemail_domains bluemail.ch blumail.org blvds.com bol.com.br bolando.com @@ -155,7 +155,7 @@ freemail_domains frisurf.no fsmail.net fsnet.co.uk ftml.net fuelie.org freemail_domains fun-greetings-jokes.com fun.21cn.com fusemail.com fut.es gala.net freemail_domains galmail.co.za gamebox.net gamecocks.com gawab.com gay.com freemail_domains gaymailbox.com gaza.net gazeta.pl gci.net gdi.net geeklife.com gemari.or.id -freemail_domains genxemail.com geopia.com georgia.usa.com +freemail_domains genxemail.com geopia.com georgia.usa.com getmail.no freemail_domains ggaweb.ch giga4u.de gjk.dk glay.org glendale.net globalfree.it globomail.com freemail_domains globalpinoy.com globalsite.com.br globalum.com globetrotter.net gmail.com freemail_domains gmx.* go-bama.com go-cavs.com go-chargers.com go-dawgs.com go-gators.com @@ -291,7 +291,7 @@ freemail_domains post.expart.ne.jp post.pl post.sk posta.ge postaccesslite.com p freemail_domains postinbox.com postino.ch postino.it postmaster.co.uk postpro.net praize.com freemail_domains press.co.jp primposta.com printesamargareta.ro freemail_domains private.21cn.com probemail.com profesional.com profession.freemail.com.br -freemail_domains proinbox.com promessage.com prontomail.com +freemail_domains proinbox.com promessage.com prontomail.com protonmail.com protonmail.ch freemail_domains provincial.net publicaccounting.com punkass.com puppy.com.my freemail_domains qatar.io qlmail.com qq.com qrio.com qsl.net qudsmail.com queerplaces.com quepasa.com freemail_domains quick.cz quickwebmail.com r-o-o-t.com r320.hu raakim.com rbcmail.ru racingseat.com @@ -338,7 +338,7 @@ freemail_domains swissmail.net switzerland.org syom.com syriamail.com t-mail.com freemail_domains t2mail.com tabasheer.com talk21.com talkcity.com tangmonkey.com tatanova.com freemail_domains taxcutadvice.com techemail.com technisamail.co.za freemail_domains teenmail.co.uk teenmail.co.za tejary.com telebot.com telefonica.net -freemail_domains telegraf.by teleline.es telinco.net telkom.net telpage.net telstra.com +freemail_domains telegraf.by teleline.es telinco.net telkom.net telpage.net telstra.com telenet.be freemail_domains telusplanet.net tempting.com tenchiclub.com tennessee.usa.com terra.* freemail_domains terra.co*.* terrapins.com texas.usa.com texascrossroads.com tfz.net thai.com freemail_domains thaimail.com thaimail.net the-fastest.net the-quickest.com thegame.com diff --git rules/20_imageinfo.cf rules/20_imageinfo.cf index 1c2e218f2..d9e809a4f 100644 --- rules/20_imageinfo.cf +++ rules/20_imageinfo.cf @@ -11,9 +11,9 @@ # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,11 +29,11 @@ ifplugin Mail::SpamAssassin::Plugin::ImageInfo ## body DC_IMAGE001_GIF eval:image_named('image001.gif') ## describe DC_IMAGE001_GIF Contains image named image001.gif -## # you can do exact image size matches +## # you can do exact image size matches ## body DC_GIF_264_127 eval:image_size_exact('gif','264','127') ## describe DC_GIF_264_127 Found 264x127 pixel gif, possible pillz -# you can do image to text, or image to html ratios +# you can do image to text, or image to html ratios rawbody __DC_IMG_HTML_RATIO eval:image_to_text_ratio('all', '0.000', '0.015') describe __DC_IMG_HTML_RATIO Low rawbody to pixel area ratio @@ -52,7 +52,7 @@ describe __DC_IMG_TEXT_RATIO Low body to pixel area ratio # body __SCREEN_1024x768 eval:image_size_exact('all',1024,768) # body __SCREEN_1280x1024 eval:image_size_exact('all',1280,1024) # meta DC_SCREENSHOT_JPG ( __SCREEN_640x480 || __SCREEN_800x600 || __SCREEN_1024x768 || __SCREEN_1280x1024 ) -# describe DC_SCREENSHOT_JPG Contains inline image matching common screen resolution +# describe DC_SCREENSHOT_JPG Contains image matching common screen resolution # score DC_SCREENSHOT_JPG -0.01 # you can do minimum demension matches @@ -82,30 +82,30 @@ body __PNG_AREA_180K eval:pixel_coverage('png','180000','475000') # meta together something useful meta DC_GIF_UNO_LARGO ( __GIF_ATTACH_1 && __GIF_AREA_180K ) -describe DC_GIF_UNO_LARGO Message contains a single large inline gif +describe DC_GIF_UNO_LARGO Message contains a single large gif image meta __DC_GIF_MULTI_LARGO ( __GIF_ATTACH_2P && __GIF_AREA_180K ) describe __DC_GIF_MULTI_LARGO Message has 2+ inline gif covering lots of area meta DC_PNG_UNO_LARGO ( __PNG_ATTACH_1 && __PNG_AREA_180K ) -describe DC_PNG_UNO_LARGO Message contains a single large inline gif +describe DC_PNG_UNO_LARGO Message contains a single large png image meta __DC_PNG_MULTI_LARGO ( __PNG_ATTACH_2P && __PNG_AREA_180K ) -describe __DC_PNG_MULTI_LARGO Message has 2+ inline png covering lots of area +describe __DC_PNG_MULTI_LARGO Message has 2+ png images covering lots of area # meta DC_JPEG_UNO_LARGO ( __JPEG_ATTACH_1 && __JPEG_AREA_180K ) -# describe DC_JPEG_UNO_LARGO Message hash single large inline jpeg +# describe DC_JPEG_UNO_LARGO Message hash single large jpeg image # meta DC_JPEG_MULTI_LARGO ( __JPEG_ATTACH_2P && __JPEG_AREA_180K ) -# describe DC_JPEG_MULTI_LARGO Message has 2+ inline jpeg covering lots of area +# describe DC_JPEG_MULTI_LARGO Message has 2+ jpeg images covering lots of area -meta DC_IMAGE_SPAM_TEXT ( __DC_IMG_TEXT_RATIO && ( DC_GIF_UNO_LARGO || DC_PNG_UNO_LARGO || __DC_GIF_MULTI_LARGO || __DC_PNG_MULTI_LARGO )) +meta DC_IMAGE_SPAM_TEXT ( !__HAS_URI && __DC_IMG_TEXT_RATIO && ( DC_GIF_UNO_LARGO || DC_PNG_UNO_LARGO || __DC_GIF_MULTI_LARGO || __DC_PNG_MULTI_LARGO )) describe DC_IMAGE_SPAM_TEXT Possible Image-only spam with little text -# meta the stock rules together for HTML_IMAGE_ONLY_* +# meta the stock rules together for HTML_IMAGE_ONLY_* meta __HTML_IMG_ONLY ( HTML_IMAGE_ONLY_04 || HTML_IMAGE_ONLY_08 || HTML_IMAGE_ONLY_12 || HTML_IMAGE_ONLY_16 || HTML_IMAGE_ONLY_20 || HTML_IMAGE_ONLY_24 || HTML_IMAGE_ONLY_28 ) -meta DC_IMAGE_SPAM_HTML ( ( __HTML_IMG_ONLY || __DC_IMG_HTML_RATIO ) && ( DC_GIF_UNO_LARGO || DC_PNG_UNO_LARGO || __DC_GIF_MULTI_LARGO || __DC_PNG_MULTI_LARGO )) +meta DC_IMAGE_SPAM_HTML (!__HAS_URI && ( __HTML_IMG_ONLY || __DC_IMG_HTML_RATIO ) && ( DC_GIF_UNO_LARGO || DC_PNG_UNO_LARGO || __DC_GIF_MULTI_LARGO || __DC_PNG_MULTI_LARGO )) describe DC_IMAGE_SPAM_HTML Possible Image-only spam endif diff --git rules/20_phrases.cf rules/20_phrases.cf index 854961f3a..4606dfaca 100644 --- rules/20_phrases.cf +++ rules/20_phrases.cf @@ -82,15 +82,15 @@ describe IMPOTENCE Impotence cure - -body NA_DOLLARS /\b(?:\d{1,3})?Million\b.{0,40}\b(?:Canadian Dollar?s?|US\$|U\.? ?S\.? Dollar)/i -describe NA_DOLLARS Talks about a million North American dollars - -body US_DOLLARS_3 /(?:\$|usd).?\d{1,3}[,.]\d{3}[,.]\d{3}(?:[,.]\d\d)?/i -describe US_DOLLARS_3 Mentions millions of $ ($NN,NNN,NNN.NN) - -body MILLION_USD /Million\b.{0,40}\b(?:United States? Dollars?|USD)/i -describe MILLION_USD Talks about millions of dollars +#MOVED TO 20_rules_to_sandbox.cf - kmcgrail 2015-05-14 +#body NA_DOLLARS /\b(?:\d{1,3})?Million\b.{0,40}\b(?:Canadian Dollar?s?|US\$|U\.? ?S\.? Dollar)/i +#describe NA_DOLLARS Talks about a million North American dollars +# +#body US_DOLLARS_3 /(?:\$|usd).?\d{1,3}[,.]\d{3}[,.]\d{3}(?:[,.]\d\d)?/i +#describe US_DOLLARS_3 Mentions millions of $ ($NN,NNN,NNN.NN) +# +#body MILLION_USD /Million\b.{0,40}\b(?:United States? Dollars?|USD)/i +#describe MILLION_USD Talks about millions of dollars diff --git rules/20_ratware.cf rules/20_ratware.cf index b7ef53e9c..805c0a533 100644 --- rules/20_ratware.cf +++ rules/20_ratware.cf @@ -110,6 +110,7 @@ header __EUDORA_MUA X-Mailer =~ /^QUALCOMM Windows Eudora (?:Pro |Li header __EUDORA_MSGID MESSAGEID =~ /^<(?:\d\d?\.){3,5}\d{14}\.[a-f0-9]{8}\@\S+(?:\sport\s\d+)?>$/m header __HAS_X_LOOP exists:X-Loop header __HAS_X_MAILING_LIST exists:X-Mailing-List +header __HAS_X_MAILMAN_VERSION exists:X-Mailman-Version meta FORGED_MUA_EUDORA (__EUDORA_MUA && !__EUDORA_MSGID && !__UNUSABLE_MSGID && !__HAS_X_LOOP && !__HAS_X_MAILING_LIST) describe FORGED_MUA_EUDORA Forged mail pretending to be from Eudora diff --git rules/25_uribl.cf rules/25_uribl.cf index bc5a0292b..6e4c0c89a 100644 --- rules/25_uribl.cf +++ rules/25_uribl.cf @@ -111,11 +111,12 @@ endif ########################################################################### ## SURBL -urirhssub URIBL_SC_SURBL multi.surbl.org. A 2 -body URIBL_SC_SURBL eval:check_uridnsbl('URIBL_SC_SURBL') -describe URIBL_SC_SURBL Contains an URL listed in the SC SURBL blocklist -tflags URIBL_SC_SURBL net -reuse URIBL_SC_SURBL +#MERGED INTO BIT 64 per bug 7279 +#urirhssub URIBL_SC_SURBL multi.surbl.org. A 2 +#body URIBL_SC_SURBL eval:check_uridnsbl('URIBL_SC_SURBL') +#describe URIBL_SC_SURBL Contains an URL listed in the SC SURBL blocklist +#tflags URIBL_SC_SURBL net +#reuse URIBL_SC_SURBL urirhssub URIBL_WS_SURBL multi.surbl.org. A 4 body URIBL_WS_SURBL eval:check_uridnsbl('URIBL_WS_SURBL') @@ -131,21 +132,35 @@ reuse URIBL_PH_SURBL urirhssub URIBL_MW_SURBL multi.surbl.org. A 16 body URIBL_MW_SURBL eval:check_uridnsbl('URIBL_MW_SURBL') -describe URIBL_MW_SURBL Contains a Malware Domain or IP listed in the MW SURBL blocklist +describe URIBL_MW_SURBL Contains a URL listed in the MW SURBL blocklist tflags URIBL_MW_SURBL net reuse URIBL_MW_SURBL -urirhssub URIBL_AB_SURBL multi.surbl.org. A 32 -body URIBL_AB_SURBL eval:check_uridnsbl('URIBL_AB_SURBL') -describe URIBL_AB_SURBL Contains an URL listed in the AB SURBL blocklist -tflags URIBL_AB_SURBL net -reuse URIBL_AB_SURBL - -urirhssub URIBL_JP_SURBL multi.surbl.org. A 64 -body URIBL_JP_SURBL eval:check_uridnsbl('URIBL_JP_SURBL') -describe URIBL_JP_SURBL Contains an URL listed in the JP SURBL blocklist -tflags URIBL_JP_SURBL net -reuse URIBL_JP_SURBL +urirhssub URIBL_CR_SURBL multi.surbl.org. A 128 +body URIBL_CR_SURBL eval:check_uridnsbl('URIBL_CR_SURBL') +describe URIBL_CR_SURBL Contains an URL listed in the CR SURBL blocklist +tflags URIBL_CR_SURBL net +reuse URIBL_CR_SURBL + +#MERGED INTO BIT 64 per bug 7279 +#urirhssub URIBL_AB_SURBL multi.surbl.org. A 32 +#body URIBL_AB_SURBL eval:check_uridnsbl('URIBL_AB_SURBL') +#describe URIBL_AB_SURBL Contains an URL listed in the AB SURBL blocklist +#tflags URIBL_AB_SURBL net +#reuse URIBL_AB_SURBL + +#JP MOVED INTO ABUSE AS WELL AND BIT REUSED per bug 7279 +urirhssub URIBL_ABUSE_SURBL multi.surbl.org. A 64 +body URIBL_ABUSE_SURBL eval:check_uridnsbl('URIBL_ABUSE_SURBL') +describe URIBL_ABUSE_SURBL Contains an URL listed in the ABUSE SURBL blocklist +tflags URIBL_ABUSE_SURBL net +reuse URIBL_ABUSE_SURBL + +#SURBL BLOCK RULES - Bit 1 means your DNS has been blocked and this rule should be triggered to notify you. +urirhssub SURBL_BLOCKED multi.surbl.org. A 1 +body SURBL_BLOCKED eval:check_uridnsbl('SURBL_BLOCKED') +describe SURBL_BLOCKED ADMINISTRATOR NOTICE: The query to SURBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists\#dnsbl-block for more information. +tflags SURBL_BLOCKED net noautolearn ########################################################################### ## URIBL @@ -191,7 +206,7 @@ uridnsbl_skip_domain 126.com 163.com 2o7.net 4at1.com uridnsbl_skip_domain 5iantlavalamp.com about.com adelphia.net adobe.com addthis.com uridnsbl_skip_domain agora-inc.com agoramedia.com akamai.net uridnsbl_skip_domain akamaitech.net amazon.com ancestry.com aol.com -uridnsbl_skip_domain apache.org apple.com arcamax.com astrology.com +uridnsbl_skip_domain apache.org apple.com arcamax.com astrology.com apple.news uridnsbl_skip_domain atdmt.com att.net bbc.co.uk uridnsbl_skip_domain bcentral.com bellsouth.net bfi0.com uridnsbl_skip_domain bridgetrack.com cafe24.com charter.net @@ -236,6 +251,7 @@ uridnsbl_skip_domain yahoo.ca yahoo.co.kr yahoo.co.uk uridnsbl_skip_domain yahoo.com yahoo.com.br yahoogroups.com yimg.com uridnsbl_skip_domain yopi.de yoursite.com zdnet.com uridnsbl_skip_domain openxmlformats.org passport.com xmlsoap.org +uridnsbl_skip_domain abc.xyz avast.com # wtogami's most frequent known good URIDNSBL lookups (1/1/2011) uridnsbl_skip_domain alexa.com ask.com baidu.com bing.com craigslist.org @@ -249,6 +265,10 @@ uridnsbl_skip_domain fedex.com uridnsbl_skip_domain openoffice.org uridnsbl_skip_domain vk.com +# pointless footer noise +uridnsbl_skip_domain security.cloud +uridnsbl_skip_domain yac.mx + # Microsoft on ns1.msedge.net uridnsbl_skip_domain microsofttranslator.com office.com microsoftonline.com bing.com msedge.net diff --git rules/30_text_de.cf rules/30_text_de.cf index 8b29a5c7a..64b172e8f 100644 --- rules/30_text_de.cf +++ rules/30_text_de.cf @@ -259,9 +259,9 @@ lang de describe PREST_NON_ACCREDITED Kaufen Sie Studienabschl lang de describe BODY_ENHANCEMENT Informationen zur Penis-/Brustvergrerung lang de describe BODY_ENHANCEMENT2 Informationen zur Penis-/Brustvergrerung lang de describe IMPOTENCE Beseitigt Impotenz -lang de describe NA_DOLLARS Handelt von einer Million Dollar aus den US oder Kanada -lang de describe US_DOLLARS_3 Erwhnt Millonen von Dollar -lang de describe MILLION_USD Erwhnt Millonen von Dollar +#lang de describe NA_DOLLARS Handelt von einer Million Dollar aus den US oder Kanada +#lang de describe US_DOLLARS_3 Erwhnt Millonen von Dollar +#lang de describe MILLION_USD Erwhnt Millonen von Dollar lang de describe URG_BIZ Dringende Geschfte lang de describe MONEY_BACK Mit Geld-zurck Garantie lang de describe FREE_QUOTE_INSTANT Kostenlos ein schnelles Preisangebot, ohne Verpflichtung @@ -359,12 +359,12 @@ endif ifplugin Mail::SpamAssassin::Plugin::URIDNSBL lang de describe URIBL_SBL Enthlt URL in SBL-Liste (http://www.spamhaus.org/sbl/) -lang de describe URIBL_SC_SURBL Enthlt URL in SC-Liste (www.surbl.org) +#lang de describe URIBL_SC_SURBL Enthlt URL in SC-Liste (www.surbl.org) - removed bug 7279 lang de describe URIBL_WS_SURBL Enthlt URL in WS-Liste (www.surbl.org) lang de describe URIBL_PH_SURBL Enthlt URL in PH-Liste (www.surbl.org) #lang de describe URIBL_OB_SURBL Enthlt URL in OB-Liste (www.surbl.org) - REMOVED BUG 6853 -lang de describe URIBL_AB_SURBL Enthlt URL in AB-Liste (www.surbl.org) -lang de describe URIBL_JP_SURBL Enthlt URL in JP-Liste (www.surbl.org) +#lang de describe URIBL_AB_SURBL Enthlt URL in AB-Liste (www.surbl.org) - removed bug 7279 +lang de describe URIBL_ABUSE_SURBL Enthlt URL in ABUSE-Liste (www.surbl.org) - changed from JP to ABUSE bug 7279 endif ifplugin Mail::SpamAssassin::Plugin::AWL diff --git rules/30_text_fr.cf rules/30_text_fr.cf index 883a6f169..b956850d5 100644 --- rules/30_text_fr.cf +++ rules/30_text_fr.cf @@ -175,7 +175,7 @@ lang fr describe LIVE_PORN Pornographie probable: porno en direct-l lang fr describe SUBJECT_DIET Le sujet parle de perte de poids lang fr describe LOW_PRICE Contient "Lowest Price" (le prix le plus bas) lang fr describe MARKETING_PARTNERS Prtend que vous vous tes enregistr auprs d'un "partenaire" quelconque -lang fr describe MILLION_USD Phrase cl d'escroquerie nigrienne (millions of dollars) +#lang fr describe MILLION_USD Phrase cl d'escroquerie nigrienne (millions of dollars) lang fr describe __MIME_BASE64 Inclut un attachement en BASE64 lang fr describe MIME_BASE64_BLANKS Ligne blanches surnumraires dans l'encodage BASE64 lang fr describe MIME_BASE64_TEXT Texte du message camoufl par encodage en BASE64 @@ -193,7 +193,7 @@ lang fr describe MONEY_BACK Vous garantit un "remboursement si insatisfait" (en lang fr describe MSGID_FROM_MTA_HEADER Message-ID ajout par un relais lang fr describe MSGID_OUTLOOK_INVALID Message-ID falsifi (fortmat Outlook Express) lang fr describe MULTI_FORGED les en-ttes "Received" montrent de nombreuses falsifications -lang fr describe NA_DOLLARS Parle d'un million de dollars "nord-amricains" +#lang fr describe NA_DOLLARS Parle d'un million de dollars "nord-amricains" lang fr describe NONEXISTENT_CHARSET Message rdig dans un jeu de caractres inexistant lang fr describe NOT_ADVISOR Contient "Not registered investment advisor" lang fr describe NO_DNS_FOR_FROM Adresse From: inconnue en DNS (pas d'enregistrement MX) @@ -253,7 +253,7 @@ lang fr describe USER_IN_DEF_WHITELIST Exp lang fr describe USER_IN_MORE_SPAM_TO Destinataire sur la liste "more_spam_to" (config SA locale) lang fr describe USER_IN_WHITELIST Expditeur sur la liste blanche (OK) (config SA locale) lang fr describe USER_IN_WHITELIST_TO Destinataire sur la liste blanche (config SA) -lang fr describe US_DOLLARS_3 Escroq. nigrienne, version modifie, phrase cl ($NN,NNN,NNN.NN) +#lang fr describe US_DOLLARS_3 Escroq. nigrienne, version modifie, phrase cl ($NN,NNN,NNN.NN) lang fr describe DRUG_ED_ONLINE Vente de Viagra par correspondance lang fr describe WEIRD_PORT Lien HTTP vers un numro de port non standard lang fr describe WEIRD_QUOTING Utilisation trange de symboles de citations diff --git rules/30_text_nl.cf rules/30_text_nl.cf index df0ec7cf9..bd2808aaa 100644 --- rules/30_text_nl.cf +++ rules/30_text_nl.cf @@ -189,9 +189,9 @@ lang nl describe PREST_NON_ACCREDITED 'Prestigieuze "Non-Accredited" lang nl describe BODY_ENHANCEMENT Informatie over het vergroten van lichaamsdelen lang nl describe BODY_ENHANCEMENT2 Informatie over het vergroten van lichaamsdelen lang nl describe IMPOTENCE Medicijn voor impotentie -lang nl describe NA_DOLLARS Praat over een miljoen Noord-Amerikaanse dollars -lang nl describe US_DOLLARS_3 Vermeldt miljoenen $ ($NN,NNN,NNN.NN) -lang nl describe MILLION_USD Heeft het over miljoenen dollars +#lang nl describe NA_DOLLARS Praat over een miljoen Noord-Amerikaanse dollars +#lang nl describe US_DOLLARS_3 Vermeldt miljoenen $ ($NN,NNN,NNN.NN) +#lang nl describe MILLION_USD Heeft het over miljoenen dollars lang nl describe MONEY_BACK Niet-goed-geld-terug garantie lang nl describe FREE_QUOTE_INSTANT Gratis offerte lang nl describe BAD_CREDIT Los al uw leningen op diff --git rules/30_text_pl.cf rules/30_text_pl.cf index 58287bbd7..0c827fc89 100644 --- rules/30_text_pl.cf +++ rules/30_text_pl.cf @@ -162,7 +162,7 @@ lang pl describe KOREAN_UCE_SUBJECT Temat: zawiera korea lang pl describe LIVE_PORN Prawdopodobnie porno - Porno na ywo lang pl describe LOW_PRICE Najnisza cena lang pl describe MARKETING_PARTNERS Twierdzi, e jeste zarejestrowany jako partner -lang pl describe MILLION_USD O milionach dolarw +#lang pl describe MILLION_USD O milionach dolarw lang pl describe MIME_BASE64_BLANKS Dodatkowe puste linie kodowane w Base64 lang pl describe MIME_BASE64_TEXT Tekst wiadomoci zakamuflowany przy uyciu kodowania Base64 lang pl describe __MIME_BASE64 Zawiera zacznik kodowany w Base64 @@ -180,7 +180,7 @@ lang pl describe MONEY_BACK Gwarancja zwrotu pieni lang pl describe MSGID_FROM_MTA_HEADER Message-Id zosta dodany przez relay lang pl describe MSGID_OUTLOOK_INVALID Sfaszowany Message-Id (w formacie Outlook Express) lang pl describe MULTI_FORGED Nagwki Received: wykazuj wielokrotne faszowanie -lang pl describe NA_DOLLARS O milionie Pnocno Amerykaskich dolarw +#lang pl describe NA_DOLLARS O milionie Pnocno Amerykaskich dolarw lang pl describe NO_DNS_FOR_FROM Domena w nagwku Do: nie posiada wpisu w DNS (MX lub A) lang pl describe NO_MEDICAL Bez bada medycznych lang pl describe NONEXISTENT_CHARSET Nieznany zestaw znakw (jzyk). @@ -231,7 +231,7 @@ lang pl describe UNCLAIMED_MONEY (ludzie po prostu rozrzucaj lang pl describe UPPERCASE_50_75 Tre jest w 50-75% wielkimi literami lang pl describe UPPERCASE_75_100 Tre jest w 75-100% wielkimi literami lang pl describe URG_BIZ Pilna sprawa -lang pl describe US_DOLLARS_3 Wspomina miliony $ ($NN,NNN,NNN.NN) +#lang pl describe US_DOLLARS_3 Wspomina miliony $ ($NN,NNN,NNN.NN) lang pl describe USER_IN_ALL_SPAM_TO Uytkownik jest wymieniony w 'all_spam_to' lang pl describe USER_IN_BLACKLIST Od: zawiera adres z Twojej "czarnej listy" lang pl describe USER_IN_BLACKLIST_TO Uytkownik jest wymieniony w 'blacklist_to' diff --git rules/30_text_pt_br.cf rules/30_text_pt_br.cf index d21a2b2ac..21ae9efff 100644 --- rules/30_text_pt_br.cf +++ rules/30_text_pt_br.cf @@ -378,9 +378,9 @@ lang pt_BR describe PREST_NON_ACCREDITED Cont lang pt_BR describe BODY_ENHANCEMENT Informao sobre como aumentar partes do corpo lang pt_BR describe BODY_ENHANCEMENT2 Informao sobre como aumentar partes do corpo lang pt_BR describe IMPOTENCE Fala sobre cura da impotncia -lang pt_BR describe NA_DOLLARS Fala sobre milhes de dlares norte americanos ou canadenses -lang pt_BR describe US_DOLLARS_3 Contm $($NN,NNN,NNN.NN) -lang pt_BR describe MILLION_USD Fala sobre milhes de dlares +#lang pt_BR describe NA_DOLLARS Fala sobre milhes de dlares norte americanos ou canadenses +#lang pt_BR describe US_DOLLARS_3 Contm $($NN,NNN,NNN.NN) +#lang pt_BR describe MILLION_USD Fala sobre milhes de dlares lang pt_BR describe URG_BIZ Contm: "urgent matter" lang pt_BR describe MONEY_BACK Contm: "Money back guarantee" lang pt_BR describe FREE_QUOTE_INSTANT Contm: "Free express or no-obligation quote" @@ -598,12 +598,13 @@ endif lang pt_BR describe URIBL_SBL Contm uma URL listada na blacklist SBL lang pt_BR describe URIBL_DBL_SPAM Contm uma URL listada na blacklist DBL blocklist lang pt_BR describe URIBL_DBL_ERROR Erro: Consultou a DBL por um IP -lang pt_BR describe URIBL_SC_SURBL Contm uma URL listada na blacklist SC SURBL +#lang pt_BR describe URIBL_SC_SURBL Contm uma URL listada na blacklist SC SURBL - removed bug 7279 lang pt_BR describe URIBL_WS_SURBL Contm uma URL listada na blacklist WS SURBL lang pt_BR describe URIBL_PH_SURBL Contm uma URL listada na blacklist PH SURBL #lang pt_BR describe URIBL_OB_SURBL Contm uma URL listada na blacklist OB SURBL - REMOVED BUG 6853 -lang pt_BR describe URIBL_AB_SURBL Contm uma URL listada na blacklist AB SURBL -lang pt_BR describe URIBL_JP_SURBL Contm uma URL listada na blacklist JP SURBL +#lang pt_BR describe URIBL_AB_SURBL Contm uma URL listada na blacklist AB SURBL - removed bug 7279 +#Changed from JP to ABUSE per bug 7279 +lang pt_BR describe URIBL_ABUSE_SURBL Contm uma URL listada na blacklist ABUSE SURBL lang pt_BR describe URIBL_BLACK Contm uma URL listada na blacklist URIBL lang pt_BR describe URIBL_GREY Contm uma URL listada na greylist URIBL lang pt_BR describe URIBL_RED Contm uma URL listada na redlist URIBL diff --git rules/50_scores.cf rules/50_scores.cf index 4673740db..8b19e7591 100644 --- rules/50_scores.cf +++ rules/50_scores.cf @@ -221,7 +221,7 @@ score L_SPAM_TOOL_13 0.539 0.485 0.494 1.333 # n=2 score MALE_ENHANCE 3.100 3.099 3.099 0.851 score MARKETING_PARTNERS 0.553 0.235 0.689 0.001 score MID_DEGREES 0 # n=0 n=1 n=2 n=3 -score MILLION_USD 3.799 2.477 3.221 3.247 +#score MILLION_USD 3.799 2.477 3.221 3.247 score MIME_BOUND_DD_DIGITS 3.016 0.349 2.417 1.373 score MIME_BOUND_DIGITS_15 0.432 1.225 1.241 0.798 score MIME_BOUND_EQ_REL 0 # n=0 n=1 n=2 n=3 @@ -245,7 +245,7 @@ score MSGID_SPAM_LETTERS 0 # n=0 n=1 n=2 n=3 score MSGID_YAHOO_CAPS 0.797 1.413 2.278 1.411 score MSOE_MID_WRONG_CASE 0.993 3.373 0.960 2.584 # n=2 score MULTI_FORGED 0 # n=0 n=1 n=2 n=3 -score NA_DOLLARS 3.599 +#score NA_DOLLARS 3.599 score NONEXISTENT_CHARSET 0 # n=0 n=1 n=2 n=3 score NORMAL_HTTP_TO_IP 0.159 0.001 0.795 0.001 score NOT_ADVISOR 0 # n=0 n=1 n=2 n=3 @@ -343,7 +343,7 @@ score TVD_FINGER_02 0.001 1.544 1.394 1.215 # n=2 score TVD_FLOAT_GENERAL 0 # n=0 n=1 n=2 n=3 score TVD_INCREASE_SIZE 1.529 0.601 1.055 0.001 # n=1 score TVD_LINK_SAVE 0 # n=0 n=1 n=2 n=3 -score TVD_PH_BODY_ACCOUNTS_PRE 1.201 1.527 1.327 2.393 # n=1 +score TVD_PH_BODY_ACCOUNTS_PRE 0.001 #changed to 0.001 due to .211 S/O on 2015-05-01 but left due to Meta Use - #1.201 1.527 1.327 2.393 # n=1 score TVD_PH_REC 3.127 2.026 3.266 1.784 # n=2 score TVD_PH_SEC 0.291 1.498 0.869 1.764 # n=1 score TVD_PP_PHISH 0 # n=0 n=1 n=2 n=3 @@ -373,7 +373,7 @@ score URI_NO_WWW_BIZ_CGI 2.399 2.399 2.400 2.399 # n=0 score URI_NO_WWW_INFO_CGI 2.299 2.299 0.292 2.071 score URI_OBFU_WWW 3.099 3.099 2.306 2.475 score URI_UNSUBSCRIBE 0 # n=0 n=1 n=2 n=3 -score US_DOLLARS_3 2.599 2.523 1.780 1.754 +#score US_DOLLARS_3 2.599 2.523 1.780 1.754 score VIA_GAP_GRA 0 # n=0 n=1 n=2 n=3 score WEIRD_PORT 0.001 0.001 0.097 0.001 score WEIRD_QUOTING 0.001 0.001 0.001 0.001 @@ -506,7 +506,8 @@ score RCVD_IN_SORBS_HTTP 0 2.499 0 0.001 # n=0 n=2 score RCVD_IN_SORBS_MISC 0 # n=0 n=1 n=2 n=3 score RCVD_IN_SORBS_SMTP 0 # n=0 n=1 n=2 n=3 score RCVD_IN_SORBS_SOCKS 0 2.443 0 1.927 # n=0 n=2 -score RCVD_IN_SORBS_WEB 0 0.614 0 0.770 # n=0 n=2 +score RCVD_IN_SORBS_SPAM 0 0.5 0 0.5 +score RCVD_IN_SORBS_WEB 0 1.5 0 1.5 score RCVD_IN_SORBS_ZOMBIE 0 # n=0 n=1 n=2 n=3 score RCVD_IN_XBL 0 0.724 0 0.375 # n=0 n=2 score RCVD_IN_PBL 0 3.558 0 3.335 # n=0 n=2 @@ -786,15 +787,17 @@ endif # Mail::SpamAssassin::Plugin::SPF # URIDNSBL ifplugin Mail::SpamAssassin::Plugin::URIDNSBL # -score URIBL_AB_SURBL 0 4.499 0 4.499 # n=0 n=2 -score URIBL_JP_SURBL 0 1.948 0 1.250 # n=0 n=2 +#score URIBL_AB_SURBL 0 4.499 0 4.499 # n=0 n=2 - removed bug 7279 +#Changed below from JP to Abuse - bug 7279 +score URIBL_ABUSE_SURBL 0 1.948 0 1.250 # n=0 n=2 score URIBL_PH_SURBL 0 0.001 0 0.610 # n=0 n=2 score URIBL_RHS_DOB 0 0.276 0 1.514 # n=0 n=2 score URIBL_SBL 0 0.644 0 1.623 # n=0 n=2 score URIBL_SBL_A 0 0.1 0 0.1 -score URIBL_SC_SURBL 0 0.001 0 0.568 # n=0 n=2 +#score URIBL_SC_SURBL 0 0.001 0 0.568 # n=0 n=2 - removed bug 7279 score URIBL_WS_SURBL 0 1.659 0 1.608 # n=0 n=2 score URIBL_MW_SURBL 0 1.263 0 1.263 +score URIBL_CR_SURBL 0 1.263 0 1.263 score URIBL_BLACK 0 1.7 0 1.7 # n=0 n=2 score URIBL_GREY 0 1.084 0 0.424 # n=0 n=2 score URIBL_DBL_SPAM 0 2.5 0 2.5 diff --git rules/active.list rules/active.list index 953427b8b..ca2733def 100644 --- rules/active.list +++ rules/active.list @@ -1,8 +1,5 @@ # active ruleset list, automatically generated from http://ruleqa.spamassassin.org/ -# with results from: day 1: axb-8mile axb-coi-bulk axb-generic axb-ham-misc bb-guenther_fraud bb-jhardin bb-jhardin_fraud bb-jm bb-kmcgrail bb-traps bb-zmi bernie-fsf bernie-it_batt bernie-mix bpoliakoff darxus dwarren grenier jarif kam-ninja kpg-core mas-cps mas-mas zmi; day 2: axb-8mile axb-coi-bulk axb-generic axb-ham-misc bb-guenther_fraud bb-jhardin bb-jhardin_fraud bb-jm bb-kmcgrail bb-traps bb-zmi bernie-fsf bernie-it_batt bernie-mix bpoliakoff darxus dwarren grenier jarif kam-ninja kpg-core llanga mas-cps mas-mas mmiroslaw-ham mmiroslaw-spam zmi; day 3: axb-8mile axb-coi-bulk axb-generic axb-ham-misc bb-guenther_fraud bb-jhardin bb-jhardin_fraud bb-jm bb-kmcgrail bb-traps bb-zmi bernie-fsf bernie-it_batt bernie-mix bpoliakoff darxus dwarren grenier kam-ninja llanga mas-cps mas-mas mmiroslaw-ham mmiroslaw-spam zmi - -# good enough -ACCT_PHISHING +# with results from: day 1: axb-coi-bulk axb-generic axb-ham-misc axb-ninja darxus ena grenier jarif kgolding thendrikx zmi; day 2: axb-coi-bulk axb-generic axb-ham-misc axb-ninja bb-guenther_fraud darxus dwarren ena grenier jarif kgolding llanga thendrikx zmi; day 3: axb-coi-bulk axb-generic axb-ham-misc axb-ninja bb-kmcgrail bb-traps bb-zmi darxus dwarren ena grenier jarif kgolding llanga thendrikx zmi # tflags publish AC_BR_BONANZA @@ -43,15 +40,9 @@ AC_SPAMMY_URI_PATTERNS9 # tflags publish ADMAIL -# good enough -ADMITS_SPAM - # tflags publish ADVANCE_FEE_2_NEW_FORM -# good enough -ADVANCE_FEE_2_NEW_FRM_MNY - # tflags publish ADVANCE_FEE_2_NEW_MONEY @@ -61,9 +52,6 @@ ADVANCE_FEE_3_NEW # tflags publish ADVANCE_FEE_3_NEW_FORM -# good enough -ADVANCE_FEE_3_NEW_FRM_MNY - # tflags publish ADVANCE_FEE_3_NEW_MONEY @@ -71,23 +59,8 @@ ADVANCE_FEE_3_NEW_MONEY ADVANCE_FEE_4_NEW # good enough -ADVANCE_FEE_4_NEW_FRM_MNY - -# good enough -ADVANCE_FEE_4_NEW_MONEY - -# good enough ADVANCE_FEE_5_NEW -# good enough -ADVANCE_FEE_5_NEW_FORM - -# good enough -ADVANCE_FEE_5_NEW_FRM_MNY - -# good enough -ADVANCE_FEE_5_NEW_MONEY - # tflags publish AD_PREFS @@ -103,9 +76,6 @@ AXB_XMAILER_MIMEOLE_OL_1ECD5 # good enough AXB_XM_FORGED_OL2600 -# good enough -AXB_X_FF_SEZ_S - # tflags learn BAYES_00 @@ -137,12 +107,15 @@ BAYES_99 BAYES_999 # good enough -BIGNUM_EMAILS +BODY_EMPTY # tflags publish BODY_URI_ONLY # tflags publish +BOGUS_MSM_HDRS + +# tflags publish CANT_SEE_AD # tflags userconf @@ -163,17 +136,11 @@ CN_B2B_SPAMMER # tflags publish COMMENT_GIBBERISH -# good enough -COMPENSATION - # tflags publish CORRUPT_FROM_LINE_IN_HDRS # good enough -DEAR_BENEFICIARY - -# good enough -DEAR_EMAIL_USER +DATE_IN_FUTURE_96_Q # good enough DEAR_WINNER @@ -223,24 +190,21 @@ DOS_OE_TO_MX # good enough DOS_OUTLOOK_TO_MX -# good enough -DSN_NO_MIMEVERSION - # tflags publish DX_TEXT_02 # tflags publish DX_TEXT_03 -# good enough -DX_TEXT_05 - -# good enough -DYN_RDNS_SHORT_HELO_HTML +# tflags publish +ENCRYPTED_MESSAGE # tflags userconf ENV_AND_HDR_SPF_MATCH +# good enough +FAKE_REPLY_C + # tflags publish FBI_MONEY @@ -250,15 +214,6 @@ FBI_SPOOF # tflags publish FILL_THIS_FORM -# good enough -FILL_THIS_FORM_FRAUD_PHISH - -# good enough -FILL_THIS_FORM_LOAN - -# good enough -FILL_THIS_FORM_LONG - # tflags publish FORM_FRAUD @@ -296,21 +251,12 @@ FROM_LOCAL_NOVOWEL FROM_MISSPACED # good enough -FROM_MISSP_DYNIP - -# good enough -FROM_MISSP_EH_MATCH - -# good enough FROM_MISSP_FREEMAIL # good enough FROM_MISSP_MSFT # good enough -FROM_MISSP_PHISH - -# good enough FROM_MISSP_REPLYTO # tflags net @@ -325,23 +271,23 @@ FROM_MISSP_USER # good enough FROM_MISSP_XPRIO -# good enough -FSL_BOTSPAM_1 +# tflags publish +FROM_WORDY -# good enough -FSL_CTYPE_WIN1251 +# tflags publish +FROM_WORDY_SHORT # good enough -FSL_HELO_BARE_IP_2 +FSL_CTYPE_WIN1251 # good enough -FSL_HELO_DEVICE +FSL_HELO_BARE_IP_1 # good enough -FSL_HELO_FAKE +FSL_HELO_BARE_IP_2 # good enough -FSL_MID_419 +FSL_HELO_NON_FQDN_1 # tflags publish FSL_NEW_HELO_USER @@ -415,6 +361,9 @@ HASHCASH_2SPEND # tflags userconf HASHCASH_HIGH +# good enough +HDRS_LCASE + # tflags publish HEADER_FROM_DIFFERENT_DOMAINS @@ -428,74 +377,29 @@ HEADER_HOST_IN_WHITELIST HEAD_LONG # good enough -HELO_LH_HOME +HELO_LOCALHOST # good enough -HELO_LOCALHOST +HELO_MISC_IP # tflags publish HEXHASH_WORD # good enough -HK_LOTTO - -# good enough -HK_LOTTO_NAME - -# good enough -HK_NAME_DRUGS - -# good enough -HK_NAME_FM_MR_MRS - -# good enough -HK_NAME_FROM - -# good enough -HK_NAME_MR_MRS - -# good enough HK_RANDOM_FROM # good enough -HK_RANDOM_REPLYTO - -# good enough -HK_SCAM_N1 - -# good enough -HK_SCAM_N13 - -# good enough HK_SCAM_N15 # good enough HK_SCAM_N2 -# good enough -HK_SCAM_N3 - -# good enough -HK_SCAM_N8 - -# good enough -HK_SCAM_S15 - -# good enough -HK_SPAMMY_FILENAME - # tflags userconf HTML_CHARSET_FARAWAY # tflags publish HTML_OFF_PAGE -# good enough -KB_DATE_CONTAINS_TAB - -# good enough -KB_FAKED_THE_BAT - # tflags publish LIST_PRTL_PUMPDUMP @@ -508,33 +412,27 @@ LONG_HEX_URI # tflags publish LONG_IMG_URI +# good enough +LONG_TERM_PRICE + # tflags publish LOTS_OF_MONEY # good enough -LOTTERY_PH_004470 - -# good enough LOTTO_AGENT # good enough -LOTTO_AGENT_RPLY - -# good enough LOTTO_DEPT # tflags publish LUCRATIVE -# good enough -MAILER_EQ_ORG - -# good enough -MALFORMED_FREEMAIL - # tflags publish MANY_SPAN_IN_TEXT +# good enough +MIMEOLE_DIRECT_TO_MX + # tflags userconf MIME_CHARSET_FARAWAY @@ -544,18 +442,6 @@ MIME_NO_TEXT # tflags userconf MISSING_HB_SEP -# good enough -MONEY_ATM_CARD - -# good enough -MONEY_BARRISTER - -# good enough -MONEY_FORM - -# good enough -MONEY_FORM_SHORT - # tflags publish MONEY_FRAUD_3 @@ -566,20 +452,14 @@ MONEY_FRAUD_5 MONEY_FRAUD_8 # good enough -MONEY_FROM_41 - -# good enough -MONEY_FROM_MISSP +MSGID_NOFQDN1 -# good enough -MONEY_LOTTERY +# tflags publish +MSM_PRIO_REPTO # good enough MSOE_MID_WRONG_CASE -# good enough -NAME_EMAIL_DIFF - # tflags net NO_DNS_FOR_FROM @@ -595,20 +475,20 @@ NSL_RCVD_FROM_USER # good enough NSL_RCVD_HELO_USER -# good enough -NULL_IN_BODY - -# good enough -OBFU_ATTACH_MISSP - # tflags publish OBFU_JVSCR_ESC # tflags publish OBFU_TEXT_ATTACH -# good enough -PDS_FROM_2_EMAILS +# tflags publish +PHP_NOVER_MUA + +# tflags publish +PHP_ORIG_SCRIPT + +# tflags publish +PHP_SCRIPT_MUA # tflags publish PP_MIME_FAKE_ASCII_TEXT @@ -839,6 +719,9 @@ RCVD_IN_SORBS_SMTP RCVD_IN_SORBS_SOCKS # tflags net +RCVD_IN_SORBS_SPAM + +# tflags net RCVD_IN_SORBS_WEB # tflags net @@ -848,19 +731,13 @@ RCVD_IN_SORBS_ZOMBIE RCVD_IN_XBL # good enough -RDNS_LOCALHOST - -# good enough REPLYTO_WITHOUT_TO_CC # good enough -RISK_FREE +RP_MATCHES_RCVD # good enough -SERGIO_SUBJECT_PORN014 - -# good enough -SERGIO_SUBJECT_VIAGRA01 +SB_GIF_AND_NO_URIS # good enough SHARE_50_50 @@ -893,8 +770,20 @@ SPF_PASS SPF_SOFTFAIL # tflags publish +SPOOFED_FREEM_REPTO + +# tflags publish +SPOOFED_FREEM_REPTO_CHN + +# tflags publish +STATIC_XPRIO_OLE + +# tflags publish STOCK_LOW_CONTRAST +# tflags publish +STOCK_TIP + # good enough STOX_REPLY_TYPE @@ -910,12 +799,12 @@ SUBJECT_IN_BLACKLIST # tflags userconf SUBJECT_IN_WHITELIST +# tflags net +SURBL_BLOCKED + # tflags publish SYSADMIN -# good enough -TAB_IN_FROM - # tflags publish TEQF_USR_IMAGE @@ -959,46 +848,52 @@ TO_NO_BRKTS_NORDNS_HTML TO_NO_BRKTS_PCNT # good enough -TT_MSGID_TRUNC +TVD_SPACE_ENCODED # good enough -TVD_PH_BODY_META +TVD_SPACE_ENC_FM_MIME # good enough -TVD_QUAL_MEDS +TVD_SPACE_RATIO_MINFP + +# tflags publish +TW_GIBBERISH_MANY # good enough -TVD_RCVD_IP +ADMITS_SPAM # good enough -TVD_RCVD_IP4 +ADVANCE_FEE_5_NEW_FRM_MNY # good enough -TVD_SPACE_ENCODED +ADVANCE_FEE_5_NEW_MONEY # good enough -TVD_SPACE_ENC_FM_MIME +COMPENSATION # good enough -TVD_SPACE_RATIO_MINFP +FILL_THIS_FORM_LOAN # good enough -TVD_VISIT_PHARMA +FILL_THIS_FORM_LONG -# tflags publish -TW_GIBBERISH_MANY +# good enough +FROM_MISSP_EH_MATCH # good enough -DOS_OUTLOOK_TO_MX_IMAGE +HK_NAME_MR_MRS # good enough -PHP_NOVER_MUA +MONEY_BARRISTER # good enough -SHORTENED_URL_SRC +MONEY_FORM_SHORT + +# good enough +MONEY_FROM_MISSP # good enough -SUBJ_UNNEEDED_HTML +XFER_LOTSA_MONEY # tflags publish UC_GIBBERISH_OBFU @@ -1007,7 +902,7 @@ UC_GIBBERISH_OBFU UNPARSEABLE_RELAY # tflags net -URIBL_AB_SURBL +URIBL_ABUSE_SURBL # tflags net URIBL_BLACK @@ -1016,6 +911,9 @@ URIBL_BLACK URIBL_BLOCKED # tflags net +URIBL_CR_SURBL + +# tflags net URIBL_DBL_ABUSE_BOTCC # tflags net @@ -1049,9 +947,6 @@ URIBL_DBL_SPAM URIBL_GREY # tflags net -URIBL_JP_SURBL - -# tflags net URIBL_MW_SURBL # tflags net @@ -1070,12 +965,12 @@ URIBL_SBL URIBL_SBL_A # tflags net -URIBL_SC_SURBL - -# tflags net URIBL_WS_SURBL # tflags publish +URI_DATA + +# tflags publish URI_DQ_UNSUB # tflags publish @@ -1156,19 +1051,16 @@ USER_IN_WHITELIST_TO # tflags publish XM_PHPMAILER_FORGED -# good enough +# tflags publish XPRIO -# good enough -YOU_INHERIT +# tflags publish +XPRIO_SHORT_SUBJ # tflags net __DKIMDOMAIN_IN_DWL_ANY # tflags net -__FROM_MISSP_DKIM - -# tflags net __RCVD_IN_DNSWL # tflags net diff --git rulesrc/10_force_active.cf rulesrc/10_force_active.cf index d5180c849..1b0f21747 100644 --- rulesrc/10_force_active.cf +++ rulesrc/10_force_active.cf @@ -348,7 +348,7 @@ publish MALE_ENHANCE publish MARKETING_PARTNERS publish MICROSOFT_EXECUTABLE publish MID_DEGREES -publish MILLION_USD +#publish MILLION_USD publish MIME_BAD_ISO_CHARSET publish MIME_BASE64_BLANKS publish MIME_BASE64_TEXT @@ -387,7 +387,7 @@ publish MSGID_YAHOO_CAPS publish MSOE_MID_WRONG_CASE publish MULTI_FORGED publish MULTIPART_ALT_NON_TEXT -publish NA_DOLLARS +#publish NA_DOLLARS publish NML_ADSP_CUSTOM_HIGH publish NML_ADSP_CUSTOM_LOW publish NML_ADSP_CUSTOM_MED @@ -641,7 +641,7 @@ publish URI_NO_WWW_INFO_CGI publish URI_OBFU_WWW publish URI_TRUNCATED publish URI_UNSUBSCRIBE -publish US_DOLLARS_3 +#publish US_DOLLARS_3 publish USER_IN_ALL_SPAM_TO publish USER_IN_BLACKLIST publish USER_IN_BLACKLIST_TO diff --git rulesrc/sandbox/axb/20_axb_misc.cf rulesrc/sandbox/axb/20_axb_misc.cf index 2965f9557..a27e7cbcc 100644 --- rulesrc/sandbox/axb/20_axb_misc.cf +++ rulesrc/sandbox/axb/20_axb_misc.cf @@ -1,19 +1,15 @@ -ifplugin Mail::SpamAssassin::Plugin::URIDetail - uri_detail AXB_URIDETAIL_BROWSETHIS text =~ /You may browse this\./ - describe AXB_URIDETAIL_BROWSETHIS LLT fingerprint - tflags AXB_URIDETAIL_BROWSETHIS nopublish -endif +# 08/16/2016 +# 06/03/2016 +header AXB_XM_LORIS232 X-Mailer =~ /^Loris v2\.32\b/ +describe AXB_XM_LORIS232 Blast from the past? -uri AXB_URI_CDGB /\/\?cid\=gib/ -describe AXB_URI_CDGB Do not mistake with CBGB +# 9/21/2015 +header AXB_X_OUTLOOKPROT_ENVSDR Authentication-Results =~ /smtp\.mailfrom\=\<\>\;/ +describe AXB_X_OUTLOOKPROT_ENVSDR Outlook Protection trash trait - -header AXB_X_FF_SEZ_S X-Forefront-Antispam-Report =~ /\bSFV\:SPM\b/ -describe AXB_X_FF_SEZ_S Forefront sez this is spam - -rawbody AXB_RBDY_TENANDTEN /\&\#1088\;\&\#1092\;/ -describe AXB_RBDY_TENANDTEN Ten & Ten +header AXB_X_FF_SEZ_S X-Forefront-Antispam-Report =~ /\bSFV\:SPM\b/ +describe AXB_X_FF_SEZ_S Forefront sez this is spam # 2012-09-27 header __AXB_XM_OL_2600 X-Mailer =~ /Microsoft\ Outlook\ Express\ 6\.00\.2600\.0000/ diff --git rulesrc/sandbox/axb/23_bayes_ignore_header.cf rulesrc/sandbox/axb/23_bayes_ignore_header.cf index 6784f1e23..460f52f4b 100644 --- rulesrc/sandbox/axb/23_bayes_ignore_header.cf +++ rulesrc/sandbox/axb/23_bayes_ignore_header.cf @@ -1,18 +1,17 @@ -# Last update: 2015-02-18-axb +# Last update: 5/19/2016-axb # Set headers which may provide inappropriate AV/filter cues to the Bayesian classifier -bayes_ignore_header X_CMAE_Category bayes_ignore_header X-ACL-Warn bayes_ignore_header X-Alimail-AntiSpam bayes_ignore_header X-Amavis-Modified +bayes_ignore_header X-Anti-Spam +bayes_ignore_header X-Anti-Virus +bayes_ignore_header X-Anti-Virus-Version bayes_ignore_header X-AntiAbuse bayes_ignore_header X-Antispam -bayes_ignore_header X-Anti-Spam bayes_ignore_header X-Antivirus -bayes_ignore_header X-Anti-Virus bayes_ignore_header X-Antivirus-Code bayes_ignore_header X-Antivirus-Status bayes_ignore_header X-Antivirus-Version -bayes_ignore_header X-Anti-Virus-Version bayes_ignore_header x-aol-global-disposition bayes_ignore_header X-ASF-Spam-Status bayes_ignore_header X-ASG-Debug-ID @@ -23,9 +22,9 @@ bayes_ignore_header X-Assp-Version bayes_ignore_header X-Authority-Analysis bayes_ignore_header X-Authvirus bayes_ignore_header X-Auto-Response-Suppress -bayes_ignore_header x-avast-antispam bayes_ignore_header X-AV-Do-Run bayes_ignore_header X-AV-Status +bayes_ignore_header x-avast-antispam bayes_ignore_header X-Backend bayes_ignore_header X-Barracuda-Apparent-Source-IP bayes_ignore_header X-Barracuda-Bayes @@ -46,8 +45,8 @@ bayes_ignore_header X-Barracuda-Start-Time bayes_ignore_header X-Barracuda-UID bayes_ignore_header X-Barracuda-URL bayes_ignore_header X-Barracuda-Virus-Alert -bayes_ignore_header X-Bayesian-Result bayes_ignore_header X-Bayes-Prob +bayes_ignore_header X-Bayesian-Result bayes_ignore_header X-BitDefender-Spam bayes_ignore_header X-BitDefender-SpamStamp bayes_ignore_header X-BL @@ -57,20 +56,22 @@ bayes_ignore_header X-Brightmail-Tracker bayes_ignore_header X-BTI-AntiSpam bayes_ignore_header X-Bugzilla-Version bayes_ignore_header X-CanIt-Geo -bayes_ignore_header X-CanItPRO-Stream bayes_ignore_header X-Canit-Stats-ID +bayes_ignore_header X-CanItPRO-Stream bayes_ignore_header X-Clapf-spamicity bayes_ignore_header X-Cloud-Security +bayes_ignore_header X-CM-Score bayes_ignore_header X-CMAE-Analysis bayes_ignore_header X-CMAE-Match bayes_ignore_header X-CMAE-Score bayes_ignore_header X-CMAE-Verdict -bayes_ignore_header X-CM-Score bayes_ignore_header X-CNFS-Analysis +bayes_ignore_header X-Company bayes_ignore_header X-Coremail-Antispam bayes_ignore_header X-CRM114-CacheID bayes_ignore_header X-CRM114-Status bayes_ignore_header X-CRM114-Version +bayes_ignore_header X-CT-Spam bayes_ignore_header X-CTCH-SenderID bayes_ignore_header X-CTCH-SenderID-TotalBulk bayes_ignore_header X-CTCH-SenderID-TotalConfirmed @@ -81,7 +82,6 @@ bayes_ignore_header X-CTCH-SenderID-TotalSuspected bayes_ignore_header X-CTCH-SenderID-TotalVirus bayes_ignore_header X-CTCH-Spam bayes_ignore_header X-CTCH-VOD -bayes_ignore_header X-CT-Spam bayes_ignore_header X-Drweb-SpamState bayes_ignore_header X-DSPAM-Confidence bayes_ignore_header X-DSPAM-Factors @@ -97,6 +97,7 @@ bayes_ignore_header X-Enigmail-Version bayes_ignore_header X-EsetId bayes_ignore_header X-EsetResult bayes_ignore_header X-Exchange-Antispam-Report +bayes_ignore_header X-ExtloopSabreCommercials1 bayes_ignore_header X-EYOU-SPAMVALUE bayes_ignore_header X-FB-OUTBOUND-SPAM bayes_ignore_header X-FEAS-SBL @@ -121,9 +122,19 @@ bayes_ignore_header X-Ironport bayes_ignore_header X-IronPort-Anti-Spam-Filtered bayes_ignore_header X-IronPort-Anti-Spam-Result bayes_ignore_header X-IronPort-AV +bayes_ignore_header X-Ironport-HAT +bayes_ignore_header X-Ironport-HOSTNAME +bayes_ignore_header X-Ironport-LNR +bayes_ignore_header X-Ironport-MessageFilter +bayes_ignore_header X-Ironport-MFP +bayes_ignore_header X-Ironport-MID bayes_ignore_header X-IronPort-Outgoing-Antispam -bayes_ignore_header X-Junkmail +bayes_ignore_header X-Ironport-RIF +bayes_ignore_header X-Ironport-SBRS +bayes_ignore_header X-Ironport-SENDER +bayes_ignore_header X-Ironport-SUBJECT bayes_ignore_header X-Junk-Score +bayes_ignore_header X-Junkmail bayes_ignore_header X-KLMS-AntiPhishing bayes_ignore_header X-Klms-Antispam bayes_ignore_header X-KLMS-AntiSpam-Info @@ -147,6 +158,7 @@ bayes_ignore_header X-MailFoundry bayes_ignore_header X-MDMailLookup-Result bayes_ignore_header X-ME-Bayesian bayes_ignore_header X-ME-Content +bayes_ignore_header X-MessageFilter bayes_ignore_header X-Microsoft-Antispam bayes_ignore_header X-Mlf-Version bayes_ignore_header X-MXScan-AntiSpam @@ -170,8 +182,8 @@ bayes_ignore_header X-PerlMx-Virus-Scanned bayes_ignore_header X-PFSI-Info bayes_ignore_header X-PMX-Spam bayes_ignore_header X-PMX-Version -bayes_ignore_header X-policyd-weight bayes_ignore_header X-Policy-Service +bayes_ignore_header X-policyd-weight bayes_ignore_header X-PreRBLs bayes_ignore_header X-Probable-Spam bayes_ignore_header X-PROLinux-SpamCheck @@ -190,34 +202,35 @@ bayes_ignore_header X-SA-Exim-Version bayes_ignore_header X-Scanned-by bayes_ignore_header X-SmarterMail-CustomSpamHeader bayes_ignore_header X-Spam -bayes_ignore_header X-Spam_bar bayes_ignore_header X-Spam-Action bayes_ignore_header X-SPAM-AISP -bayes_ignore_header X-Spambayes-Classification bayes_ignore_header X-Spam-Check-By bayes_ignore_header X-Spam-Checker-Version bayes_ignore_header X-Spam-CMAE-Analysis bayes_ignore_header X-Spam-CMAESCORE bayes_ignore_header X-Spam-CTCH-RefID +bayes_ignore_header X-Spam-Flag +bayes_ignore_header X-Spam-Level +bayes_ignore_header X-Spam-Processed +bayes_ignore_header X-Spam-Report +bayes_ignore_header X-Spam-Scanned +bayes_ignore_header X-Spam-Score +bayes_ignore_header X-Spam-Score-Int +bayes_ignore_header X-Spam-SmartLearn +bayes_ignore_header X-Spam-Status +bayes_ignore_header X-Spam-Threshold +bayes_ignore_header X-Spam_bar +bayes_ignore_header X-Spambayes-Classification bayes_ignore_header X-SpamExperts-Domain bayes_ignore_header X-SpamExperts-Outgoing-Class bayes_ignore_header X-SpamExperts-Outgoing-Evidence bayes_ignore_header X-SpamExperts-Username bayes_ignore_header X-Spamfilter-host -bayes_ignore_header X-Spam-Flag bayes_ignore_header X-Spamina-Bogosity bayes_ignore_header X-Spamina-Spam-Report bayes_ignore_header X-Spamina-Spam-Score bayes_ignore_header X-SpamInfo -bayes_ignore_header X-Spam-Level -bayes_ignore_header X-Spam-Processed -bayes_ignore_header X-Spam-Report bayes_ignore_header X-Spamsave -bayes_ignore_header X-Spam-Scanned -bayes_ignore_header X-Spam-Score -bayes_ignore_header X-Spam-Score-Int -bayes_ignore_header X-Spam-SmartLearn -bayes_ignore_header X-Spam-Status bayes_ignore_header X-SpamTest-Group-ID bayes_ignore_header X-SpamTest-Info bayes_ignore_header X-SpamTest-Method @@ -225,12 +238,11 @@ bayes_ignore_header X-SpamTest-Rate bayes_ignore_header X-SpamTest-SPF bayes_ignore_header X-SpamTest-Status bayes_ignore_header X-SpamTest-Status-Extended -bayes_ignore_header X-Spam-Threshold bayes_ignore_header X-SPF-Scan-By bayes_ignore_header X-STA-Metric bayes_ignore_header X-STA-NotSpam -bayes_ignore_header X-StarScan-Version bayes_ignore_header X-STA-Spam +bayes_ignore_header X-StarScan-Version bayes_ignore_header X-SurGATE-Result bayes_ignore_header X-SWITCHham-Score bayes_ignore_header X-UI-Filterresults @@ -239,12 +251,12 @@ bayes_ignore_header X-UI-Out-Filterresults bayes_ignore_header X-Univie-Spam-Checker-Version bayes_ignore_header X-Univie-Virus-Scan bayes_ignore_header X-Virus -bayes_ignore_header X-VirusChecked bayes_ignore_header X-Virus-Checker-Version bayes_ignore_header X-Virus-Scanned bayes_ignore_header X-Virus-Scanner-Result bayes_ignore_header X-Virus-Scanner-Version bayes_ignore_header X-Virus-Status +bayes_ignore_header X-VirusChecked bayes_ignore_header X-VR-SCORE bayes_ignore_header X-VR-SPAMCAUSE bayes_ignore_header X-VR-STATUS @@ -255,3 +267,4 @@ bayes_ignore_header X-WatchGuard-Spam-ID bayes_ignore_header X-WatchGuard-Spam-Score bayes_ignore_header X-Whitelist-Domain bayes_ignore_header X-WUM-CCI +bayes_ignore_header X_CMAE_Category \ No newline at end of file diff --git rulesrc/sandbox/emailed/00_FVGT_File001.cf rulesrc/sandbox/emailed/00_FVGT_File001.cf index 560307f8e..bd894a228 100644 --- rulesrc/sandbox/emailed/00_FVGT_File001.cf +++ rulesrc/sandbox/emailed/00_FVGT_File001.cf @@ -3958,7 +3958,7 @@ ##counts FRT_STRONG2 9s/0h of 47019 corpus (37183s/9836h FVGT) 12/23/06 # # -##body FRT_SYMBOL /\b(?!symbol)/i +##body FRT_SYMBOL /\b(?!symboo?l)/i ##describe FRT_SYMBOL ReplaceTags: Symbol ###score FRT_SYMBOL 2.932 ##replace_rules FRT_SYMBOL @@ -4029,4 +4029,4 @@ #endif # Mail::SpamAssassin::Plugin::ReplaceTags # ## EOF -# \ No newline at end of file +# diff --git rulesrc/sandbox/felicity/70_other.cf rulesrc/sandbox/felicity/70_other.cf index 5bbd501d1..9283f03b5 100644 --- rulesrc/sandbox/felicity/70_other.cf +++ rulesrc/sandbox/felicity/70_other.cf @@ -54,7 +54,8 @@ body TVD_FUZZY_PHARMACEUTICAL /(?!pharmaceutical)

< replace_rules TVD_FUZZY_PHARMACEUTICAL describe TVD_FUZZY_PHARMACEUTICAL Obfuscation of the word "pharmaceutical" -body TVD_FUZZY_SYMBOL /(?!symbol)/i +# bug 7356 +body TVD_FUZZY_SYMBOL /(?!symboo?l)/i replace_rules TVD_FUZZY_SYMBOL describe TVD_FUZZY_SYMBOL Obfuscation of the word "symbol" diff --git rulesrc/sandbox/hstern/20_uri_tests.cf rulesrc/sandbox/hstern/20_uri_tests.cf index b92af1834..5f4651946 100644 --- rulesrc/sandbox/hstern/20_uri_tests.cf +++ rulesrc/sandbox/hstern/20_uri_tests.cf @@ -1,4 +1,4 @@ -########################################################################### + ########################################################################### # # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more @@ -7,9 +7,9 @@ # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,14 +29,14 @@ redirector_pattern m'/(?:index.php)?\?.*(?<=[?&])URL=(.*?)(?:$|[&\#])'i # Google redirector. # Common form: -# http://www.google.com/url?sa=U&start=4&q=http://urlofspammer +# http://www.google.com/url?sa=U&start=4&q=http://urlofspammer # -> http://urlofspammer # Unhandled form: # http://www.google.com/url?q=http://urlofspammer/space&q=here # -> http://urlofspammer/space%20here # Redirector gets http://urlofspammer/space # http://www.google.com/url?q=http://urlof&q=spammer does not work -redirector_pattern m'^http:/*(?:\w+\.)?google(?:\.\w{2,3}){1,2}/url\?.*?(?<=[?&])q=(.*?)(?:$|[&\#])'i +redirector_pattern m'^https?:/*(?:\w+\.)?google(?:\.\w{2,3}){1,2}/url\?.*?(?<=[?&])q=(.*?)(?:$|[&\#])'i # Google site search # http://www.google.com/search?q=site:bluevallet.com @@ -44,29 +44,29 @@ redirector_pattern m'^http:/*(?:\w+\.)?google(?:\.\w{2,3}){1,2}/url\?.*?(?<=[?&] # Google inurl search # http://google.com//search?hl=en&q=inurl:rnyself.com%2Bvpxl%2Bmade%2Beasy&btnI=RC27 # -> searches for 'VXPL made easy' on rnyself.com -redirector_pattern m'^http:/*(?:\w+\.)?google(?:\.\w{2,3}){1,2}/search\?.*?(?<=[?&])q=[^&]*?(?<=%20|..[=+\s])(?:site|inurl):(.*?)(?:$|%20|[\s+&\#])'i +redirector_pattern m'^https?:/*(?:\w+\.)?google(?:\.\w{2,3}){1,2}/search\?.*?(?<=[?&])q=[^&]*?(?<=%20|..[=+\s])(?:site|inurl):(.*?)(?:$|%20|[\s+&\#])'i # Google search for pages that contain the site name # http://www.google.com/search?q="bluevallet.com" # http://www.google.com/search?q=%22bluevallet.com%22 # -> links to search page that probably has http://bluevallet.com # at the top -redirector_pattern m'^http:/*(?:\w+\.)?google(?:\.\w{2,3}){1,2}/search\?.*?(?<=[?&])q=[^&]*?(?<=%20|..[=+\s])(?:"|%22)(.*?)(?:$|%22|["\s+&\#])'i +redirector_pattern m'^https?:/*(?:\w+\.)?google(?:\.\w{2,3}){1,2}/search\?.*?(?<=[?&])q=[^&]*?(?<=%20|..[=+\s])(?:"|%22)(.*?)(?:$|%22|["\s+&\#])'i # Google translate # http://translate.google.com/translate?u=www.domain.tld&langpair=en%7Cen&hl=en # -> http://www.domain.tld inside a frame -redirector_pattern m'^http:/*(?:\w+\.)?google(?:\.\w{2,3}){1,2}/translate\?.*?(?<=[?&])u=(.*?)(?:$|[&\#])'i +redirector_pattern m'^https?:/*(?:\w+\.)?google(?:\.\w{2,3}){1,2}/translate\?.*?(?<=[?&])u=(.*?)(?:$|[&\#])'i # Google Ads # http://google.com/pagead/iclk?sa=l&ai=nightmare&num=399412020&adurl=http://quilarpe.com?375 # -> http://quilarpe.com?375 -redirector_pattern m'^http:/*(?:\w+\.)?google(?:\.\w{2,3}){1,2}/pagead/iclk\?.*?(?<=[?&])adurl=(.*?)(?:$|[&\#])'i +redirector_pattern m'^https?:/*(?:\w+\.)?google(?:\.\w{2,3}){1,2}/pagead/iclk\?.*?(?<=[?&])adurl=(.*?)(?:$|[&\#])'i # AOL redirector # http://aol.com/redir.adp?_url=http://www.ixp.jp/univac/ # -> http://www.ixp.jp/univac/ -redirector_pattern m'^http:/*(?:\w+\.)?aol\.com/redir\.adp\?.*(?<=[?&])_url=(.*?)(?:$|[&\#])'i +redirector_pattern m'^https?:/*(?:\w+\.)?aol\.com/redir\.adp\?.*(?<=[?&])_url=(.*?)(?:$|[&\#])'i # Facebook redirector # http://www.facebook.com/l/;www.example.com diff --git rulesrc/sandbox/jhardin/20_MIME_no_text.cf rulesrc/sandbox/jhardin/20_MIME_no_text.cf index 74ef6b644..cac92311a 100644 --- rulesrc/sandbox/jhardin/20_MIME_no_text.cf +++ rulesrc/sandbox/jhardin/20_MIME_no_text.cf @@ -13,7 +13,7 @@ ifplugin Mail::SpamAssassin::Plugin::MIMEHeader mimeheader __ANY_TEXT_ATTACH Content-Type =~ /text\/\w+/i meta __MIME_NO_TEXT (__CTYPE_MULTIPART_ANY && !__ANY_TEXT_ATTACH) - meta MIME_NO_TEXT __MIME_NO_TEXT && !ALL_TRUSTED && !__MSGID_APPLEMAIL && !__USER_AGENT_APPLEMAIL && !__HAS_IN_REPLY_TO && !__HAS_X_REF && !__HS_SUBJ_RE_FW && !__LCL__ENV_AND_HDR_FROM_MATCH + meta MIME_NO_TEXT __MIME_NO_TEXT && !__BOUNCE_CTYPE && !__CT_ENCRYPTED && !ALL_TRUSTED && !__MSGID_APPLEMAIL && !__USER_AGENT_APPLEMAIL && !__HAS_IN_REPLY_TO && !__HAS_X_REF && !__HS_SUBJ_RE_FW && !__LCL__ENV_AND_HDR_FROM_MATCH score MIME_NO_TEXT 2.00 # limit describe MIME_NO_TEXT No (properly identified) text body parts tflags MIME_NO_TEXT publish diff --git rulesrc/sandbox/jhardin/20_lotsa_money.cf rulesrc/sandbox/jhardin/20_lotsa_money.cf index b4ceb9625..8c54916e2 100644 --- rulesrc/sandbox/jhardin/20_lotsa_money.cf +++ rulesrc/sandbox/jhardin/20_lotsa_money.cf @@ -87,12 +87,12 @@ body __TO_YOUR_ORG /\b(?:to|for) your organi[sz]ation\b/i body __TRAVEL_ITINERARY /(?:travel|ticketed|your|current) itinerary/i body __AUTO_ACCIDENT /auto(?:mobile)? accident/i -body __LOTTO_AGENT_01 /\b(?:(?:(?:the|y?our)(?:\s\w{1,20})?|contact|accredited|listed)\sclaim(?:s|ing)?(?:\sprocessing)?|fiducia\w+|reimbursement|(?:prize|international|intl|foreign|win+ing)(?:[\s,.]+(?:rem+it+ance|settlement|payment|payout|award|transfer))+|payment|payout|immunity|(?=]{1,30}=[^"][^">]{1,30}[^=]"[\s>]/ -describe MISQ_HTML Unbalanced quotes in HTML tag -tflags MISQ_HTML nopublish +#rawbody MISQ_HTML /<\w{2,20}[^>=]{1,30}=[^"][^">]{1,30}[^=]"[\s>]/ +#describe MISQ_HTML Unbalanced quotes in HTML tag +#tflags MISQ_HTML nopublish # observed in bank phishing 09/2009 uri WIKI_IMG m,^https?://[^/]+wiki[mp]edia\.org/.+\.(?:png|gif|jpe?g),i @@ -463,8 +478,9 @@ describe TO_EQ_FM_HTML_ONLY To == From and HTML only #tflags TO_EQ_FM_HTML_ONLY publish meta __TO_EQ_FM_DIRECT_MX __TO_EQ_FROM && __DOS_DIRECT_TO_MX -meta TO_EQ_FM_DIRECT_MX __TO_EQ_FM_DIRECT_MX && !__THREAD_INDEX_GOOD && !__IS_EXCH +meta TO_EQ_FM_DIRECT_MX __TO_EQ_FM_DIRECT_MX && !__THREAD_INDEX_GOOD && !__IS_EXCH && !__CTYPE_MULTIPART_MIXED describe TO_EQ_FM_DIRECT_MX To == From and direct-to-MX +score TO_EQ_FM_DIRECT_MX 2.500 # limit #tflags TO_EQ_FM_DIRECT_MX publish # Why __HUSH_HUSH hits ham on this in masscheck I don't know. Legit bank emails maybe? @@ -609,7 +625,7 @@ body __STYLE_TAG_IN_BODY /]{0,30})?>/i body __BODY_XHTML //i if can(Mail::SpamAssassin::Conf::perl_min_version_5010000) # possessive {0,4}+ requires perl 5.10 or better - rawbody __STYLE_GIBBERISH_1 /]{0,40})?>(?:\s{0,100}(?!<\/style>)(?:(?:\/\*(?:\s|[^*<]|\*(?!\/)|<(?!\/style>)){0,200}\*\/)|\#[^{<]{1,50}\{[^}<]{4,100}\})){0,4}+(?:\s{0,100}(?!<\/style>|\/\*)[^\s:;,]){150}/im + rawbody __STYLE_GIBBERISH_1 /]{0,40})?>(?:\s{0,100}(?!<\/style>)(?:(?:\/\*(?:\s|[^*<]|\*(?!\/)|<(?!\/style>)){0,200}\*\/)|\#[^{<]{1,50}\{[^}<]{4,100}\})){0,4}+(?:\s{0,100}(?!<\/style>|\/\*)(?:\/{3,}\*|,{2,}|;{2,}|:{2,}|[^\s:;,])){150}/im else # older perl, can't deal with style comments properly rawbody __STYLE_GIBBERISH_1 /]{0,40})?>(?:\s{0,100}(?!<\/style>|\/\*)[^\s:;,]){150}/im @@ -652,10 +668,10 @@ tflags COMMENT_GIBBERISH publish #body LOTSA_EMAILS /\b(?:thousand|million)\se-?mail(?:\saddresse)?s?\b/i #tflags LOTSA_EMAILS nopublish -body __BIGNUM_EMAILS /\b(?:thousand|million|\d[,\d]{4,})\s(?:(?!and|or|your)\w+\s)?(?:e-?mail\saddresses|leads|names)\b/i +body __BIGNUM_EMAILS /\b(?:thousand|million|\d[,\d]{4,})\s(?:(?!and|or|your|place|baby)\w+\s)?(?:e-?mail\saddresses|leads|names)\b/i meta BIGNUM_EMAILS __BIGNUM_EMAILS && !__SPOOFED_URL && !__BUGGED_IMG describe BIGNUM_EMAILS Lots of email addresses/leads -score BIGNUM_EMAILS 3.00 # limti +score BIGNUM_EMAILS 3.00 # limit #tflags BIGNUM_EMAILS nopublish #rawbody __HTML_ELEM_OBFU /[a-z\s]&\#[91]\d\d?[a-z]/ @@ -726,10 +742,21 @@ describe IMAGESHACK_URI URI contains imageshack.us #meta DYNDNS_URIS __DYNDNS_URI > 1 #describe DYNDNS_URIS Has multiple dyndns.org URIs -uri __BITLY_URI /\/\/bit\.ly\//i -meta BITLY_URI __BITLY_URI && !__SUBSCRIPTION_INFO && !__HAS_ANY_EMAIL && !__HAS_REPLY_TO && !__UNSUB_LINK && !__RCD_RDNS_MAIL_MESSY && !__RP_MATCHES_RCVD && !__COMMENT_EXISTS && !__TO_NO_BRKTS_HTML_ONLY && !__NOT_SPOOFED -describe BITLY_URI URI contains bit.ly -score BITLY_URI 2.25 # limit + +## Does not perform better than URL_SHORTENER family +## the ones that misses are already scoring 7+ points +#uri __BITLY_URI /\/\/bit\.ly\//i +#meta BITLY_URI __BITLY_URI && !__HDR_CASE_REVERSED && !__HAS_SENDER && !__HAS_CAMPAIGNID && !__DOS_HAS_LIST_UNSUB && !__HAS_ERRORS_TO && !__MAIL_LINK && !__MSGID_JAVAMAIL && !__ENV_AND_HDR_FROM_MATCH && !__THREADED && !__USING_VERP1 && !__IMG_VIA_BITLY && !__URL_SHORTENER +#describe BITLY_URI URI contains bit.ly +#score BITLY_URI 3.000 # limit +#tflags BITLY_URI publish +# +## HTML image sourced via URL shortening service: +## +#rawbody __IMG_VIA_BITLY m;]+\ssrc\s*=\s*"?https?://(?:www\.)?bit\.ly/;i +#meta IMG_VIA_BITLY __IMG_VIA_BITLY && !SHORTENED_URL_SRC +#describe IMG_VIA_BITLY HTML image via URL shortener - URIBL avoidance? +#score IMG_VIA_BITLY 2.500 # limit uri __URI_OBFU_DOM /:\/\/(?:\w+\.)+(?:com|gov|net|org)(?:\.\w+){3,}\//i meta URI_OBFU_DOM __URI_OBFU_DOM && !__VIA_ML @@ -805,14 +832,15 @@ header __RPATH_12LTRDOM Return-Path =~ /\@[a-z]{12}\./ uri __URI_12LTRDOM m,://(?:[^./]+\.)*[a-z]{12}\.[^./]+/,i header __FROM_12LTRDOM_1 From =~ /\@(?!facebookmail)[a-z]{12}\./ -ifplugin Mail::SpamAssassin::Plugin::FreeMail - meta FROM_12LTRDOM __FROM_12LTRDOM_1 && !__VIA_ML && !__TO___LOWER && !__FS_SUBJ_RE && !__RCD_RDNS_MAIL_MESSY && !__freemail_safe && !__UNSUB_LINK && !NO_RELAYS && !__UNUSABLE_MSGID && !DATE_IN_PAST_96_XX && !ALL_TRUSTED && !__MSGID_APPLEMAIL && !__RCD_RDNS_SMTP_MESSY && !__FB_NATIONAL && !__MAIL_LINK && !__NAME_EMAIL_DIFF && !__RCD_RDNS_MX_MESSY && !__RCD_RDNS_MX && !__SENDER_BOT && !__IMS_MSGID && !__HS_SUBJ_RE_FW && !__DOS_HAS_LIST_UNSUB && !__THREAD_INDEX_GOOD && !__TO_EQ_FROM_DOM && !__URI_MAILTO && !__SUBSCRIPTION_INFO -else - meta FROM_12LTRDOM __FROM_12LTRDOM_1 && !__VIA_ML && !__TO___LOWER && !__FS_SUBJ_RE && !__RCD_RDNS_MAIL_MESSY && !__UNSUB_LINK && !NO_RELAYS && !__UNUSABLE_MSGID && !DATE_IN_PAST_96_XX && !ALL_TRUSTED && !__MSGID_APPLEMAIL && !__RCD_RDNS_SMTP_MESSY && !__FB_NATIONAL && !__MAIL_LINK && !__NAME_EMAIL_DIFF && !__RCD_RDNS_MX_MESSY && !__RCD_RDNS_MX && !__SENDER_BOT && !__IMS_MSGID && !__HS_SUBJ_RE_FW && !__DOS_HAS_LIST_UNSUB && !__THREAD_INDEX_GOOD && !__TO_EQ_FROM_DOM && !__URI_MAILTO && !__SUBSCRIPTION_INFO -endif -describe FROM_12LTRDOM From a 12-letter domain -#tflags FROM_12LTRDOM nopublish -score FROM_12LTRDOM 0.10 # limit +## suppress this, masscheck is publishing it as a T_ rule and ignoring the score limit, so hits get 1 point +#ifplugin Mail::SpamAssassin::Plugin::FreeMail +# meta FROM_12LTRDOM __FROM_12LTRDOM_1 && !__VIA_ML && !__TO___LOWER && !__FS_SUBJ_RE && !__RCD_RDNS_MAIL_MESSY && !__freemail_safe && !__UNSUB_LINK && !NO_RELAYS && !__UNUSABLE_MSGID && !DATE_IN_PAST_96_XX && !ALL_TRUSTED && !__MSGID_APPLEMAIL && !__RCD_RDNS_SMTP_MESSY && !__FB_NATIONAL && !__MAIL_LINK && !__NAME_EMAIL_DIFF && !__RCD_RDNS_MX_MESSY && !__RCD_RDNS_MX && !__SENDER_BOT && !__IMS_MSGID && !__HS_SUBJ_RE_FW && !__DOS_HAS_LIST_UNSUB && !__THREAD_INDEX_GOOD && !__TO_EQ_FROM_DOM && !__URI_MAILTO && !__SUBSCRIPTION_INFO +#else +# meta FROM_12LTRDOM __FROM_12LTRDOM_1 && !__VIA_ML && !__TO___LOWER && !__FS_SUBJ_RE && !__RCD_RDNS_MAIL_MESSY && !__UNSUB_LINK && !NO_RELAYS && !__UNUSABLE_MSGID && !DATE_IN_PAST_96_XX && !ALL_TRUSTED && !__MSGID_APPLEMAIL && !__RCD_RDNS_SMTP_MESSY && !__FB_NATIONAL && !__MAIL_LINK && !__NAME_EMAIL_DIFF && !__RCD_RDNS_MX_MESSY && !__RCD_RDNS_MX && !__SENDER_BOT && !__IMS_MSGID && !__HS_SUBJ_RE_FW && !__DOS_HAS_LIST_UNSUB && !__THREAD_INDEX_GOOD && !__TO_EQ_FROM_DOM && !__URI_MAILTO && !__SUBSCRIPTION_INFO +#endif +#describe FROM_12LTRDOM From a 12-letter domain +##tflags FROM_12LTRDOM nopublish +#score FROM_12LTRDOM 0.10 # limit # promising masscheck results meta __MONEY_12LTRDOM __FROM_12LTRDOM_1 && __LOTSA_MONEY_00 @@ -864,11 +892,9 @@ describe __UCOZ_URI URI contains ucoz.org #body __ARTHUR_SIMMONS /Arthur Simmons/ #body __INTRUST_DOMS /In[Tt]rust Domains/ #meta ARTHUR_INTRUST __ARTHUR_SIMMONS && __INTRUST_DOMS -#score ARTHUR_INTRUST 4.5 #describe ARTHUR_INTRUST Arthur Simmons - registrar spammer extraordinaire #header ART_NAMES_ORG Received =~ /\bart\.names\.org\b/i -#score ART_NAMES_ORG 4.0 #describe ART_NAMES_ORG Arthur Simmons - registrar spammer extraordinaire if can(Mail::SpamAssassin::Conf::feature_bug6558_free) @@ -1003,9 +1029,10 @@ meta GAPPY_HTML __GAPPY_HTML && !__UNSUB_LINK && !__RP_MATCHE describe GAPPY_HTML HTML body with much useless whitespace # Try to improve S/O per bug 6119 -meta TVD_SPACE_RATIO_MINFP __TVD_SPACE_RATIO && !__LCL__ENV_AND_HDR_FROM_MATCH && !__SUBSCRIPTION_INFO && !__RCD_RDNS_MAIL && !__SUBJECT_ENCODED_QP && !__THREADED && !__TO_EQ_FROM_DOM && !__BOTH_INR_AND_REF && !__X_CRON_ENV && !__HAS_THREAD_INDEX && !__HDRS_LCASE_KNOWN && !__ISO_2022_JP_DELIM +meta TVD_SPACE_RATIO_MINFP __TVD_SPACE_RATIO && !__CT_ENCRYPTED && !__LCL__ENV_AND_HDR_FROM_MATCH && !__SUBSCRIPTION_INFO && !__RCD_RDNS_MAIL && !__SUBJECT_ENCODED_QP && !__THREADED && !__TO_EQ_FROM_DOM && !__BOTH_INR_AND_REF && !__X_CRON_ENV && !__HAS_THREAD_INDEX && !__HDRS_LCASE_KNOWN && !__ISO_2022_JP_DELIM && !__DOS_HAS_LIST_UNSUB && !__RP_MATCHES_RCVD #tflags TVD_SPACE_RATIO_MINFP nopublish -score TVD_SPACE_RATIO_MINFP 2.750 # limit +score TVD_SPACE_RATIO_MINFP 2.500 # limit +describe TVD_SPACE_RATIO_MINFP Space ratio # Only useful for English-language email #meta SUBJECT_UNNEEDED_ENCODING (__SUBJECT_ENCODED_B64 && !__SUBJECT_UTF8_B_ENCODED) && !__RCD_RDNS_MAIL && !__LCL__ENV_AND_HDR_FROM_MATCH && !__SUBSCRIPTION_INFO && !__THREADED && !__NONBOUNCE_READ_RECEIPT @@ -1017,9 +1044,11 @@ score TVD_SPACE_RATIO_MINFP 2.750 # limit meta __TVD_SPACE_ENCODED (__TVD_SPACE_RATIO && __SUBJECT_ENCODED_B64 && !__SUBJECT_UTF8_B_ENCODED) meta TVD_SPACE_ENCODED __TVD_SPACE_ENCODED && !__NOT_SPOOFED && !__VIA_ML && !__HS_SUBJ_RE_FW && !__SUBSCRIPTION_INFO && !__TO_EQ_FROM_DOM && !__RCD_RDNS_MAIL && !__ISO_2022_JP_DELIM score TVD_SPACE_ENCODED 2.500 # limit +describe TVD_SPACE_ENCODED Space ratio & encoded subject meta TVD_SPACE_ENC_FM_MIME __TVD_SPACE_ENCODED && __FROM_NEEDS_MIME && !__ISO_2022_JP_DELIM score TVD_SPACE_ENC_FM_MIME 2.000 # limit +describe TVD_SPACE_ENC_FM_MIME Space ratio & encoded subject & MIME needed # sample from users list: Subject: Sta ffWork sFastToSen dTab le tsGood s @@ -1138,10 +1167,11 @@ describe URI_MALWARE_BH Possible BlackHole malware links / phishing score URI_MALWARE_BH 1.0 # limit # suggested by https://isc.sans.edu/diary.html?storyid=13996 -uri __URI_DATA /^data:[a-z]/i -meta URI_DATA __URI_DATA && !ALL_TRUSTED +uri __URI_DATA /^data:(?!image\/)[a-z]/i +meta URI_DATA __URI_DATA && !ALL_TRUSTED && !__RCD_RDNS_MAIL_MESSY && !__HAS_ERRORS_TO && !__VIA_ML && !__ENV_AND_HDR_FROM_MATCH && !__DOS_HAS_LIST_UNSUB describe URI_DATA "data:" URI - possible malware or phish -score URI_DATA 1.0 # limit +score URI_DATA 3.250 # limit +tflags URI_DATA publish header __SUBJ_ATTENTION Subject =~ /ATTENTION/ @@ -1190,13 +1220,14 @@ header __FROM_WESTERNUNION From:addr =~ /westernunion\.com$/i meta __FROM_MISSP_PHISH __FROM_MISSPACED && (__FROM_ASB_BANK || __FROM_AMEX || __FROM_BANK_LOOSE || __FROM_CHASE || __FROM_CMNWLTH_BANK || __FROM_EBAY_LOOSE || __FROM_HSBC || __FROM_LLOYDSTSB || __FROM_PAYPAL_LOOSE || __FROM_WELLSFARGO || __FROM_WESTERNUNION) meta FROM_MISSP_PHISH __FROM_MISSP_PHISH describe FROM_MISSP_PHISH Malformed, claims to be from financial organization - possible phish -score FROM_MISSP_PHISH 4.75 # limit +score FROM_MISSP_PHISH 3.500 # limit # another upload-a-document-for-public-access site uri __URI_YOUSENDIT m,^https?://www\.yousendit\.com/directdownload,i # see also DOS_GOOGLE_DOCS uri __URI_GOOGLE_DOC m,^https?://docs\.google\.com/(?:[^/]+/)*view(?:form)?\?(?:id|formkey)=,i +uri __URI_GOOGLE_DRV m,^https?://googledrive\.com/,i body __WEBMAIL_ACCT /\byour web ?mail account/i body __MAILBOX_FULL /\b(?:you(?:r (?:mail\s?box|(?:e-?|web ?)mail))? (?:is (?:almost )?full|(?:quota )?ha(?:s|ve) (?:reached|exceeded|passed) (?:the|your|it'?s?) (?:university )?(?:size|storage|set|(?:e-?|web ?)mail|quota|folder|mail ?box)[\/\s](?:limit |quota |account )+)|over your mail\s?box (?:size )?(?:limit|quota)|maximum mail\s?box (?:size )?(?:limit|quota) exceeded|sua (?:conta|caixa) de (?:(?:e-?|web ?)mail|correio) (?:excedeu (?:sua|o) limite|est(?:=E1|[\xe1]|[\xc3][\xa1]) quase cheio))\b/i @@ -1291,7 +1322,7 @@ meta __EMPTY_BODY __BODY_TEXT_LINE < 2 && !__SMIME_MESSAGE # this hits 13% of masscheck corpus spam, 50% of that only scores 2 points meta BODY_EMPTY __EMPTY_BODY && !__NUMBERS_IN_SUBJ && !__CTE && !__RP_MATCHES_RCVD && !__VIA_ML && !__MIME_ATTACHMENT && !__HAS_THREAD_INDEX && !__TO_EQ_FROM_DOM && !__LCL__ENV_AND_HDR_FROM_MATCH && !__FROM_LOWER && !__NOT_SPOOFED && !__MSGID_APPLEMAIL && !__RCD_RDNS_MAIL_MESSY && !NO_RELAYS && !__NOT_A_PERSON describe BODY_EMPTY No body text in message -score BODY_EMPTY 3.00 # limit +score BODY_EMPTY 2.00 # limit meta __BODY_URI_ONLY __BODY_TEXT_LINE < 3 && __HAS_ANY_URI && !__SMIME_MESSAGE @@ -1307,11 +1338,11 @@ header __SINGLE_WORD_SUBJ Subject =~ /^\s*\S{1,60}\s*$/ meta __BODY_SINGLE_WORD __BODY_TEXT_LINE < 3 && !__EMPTY_BODY && !__SMIME_MESSAGE && ((__SINGLE_WORD_LINE && !__SINGLE_WORD_SUBJ) || __SINGLE_WORD_LINE > 1) meta BODY_SINGLE_WORD __BODY_SINGLE_WORD && !ALL_TRUSTED && !__HDRS_LCASE_KNOWN && !__FROM_ALL_NUMS && !__RCD_RDNS_SMTP describe BODY_SINGLE_WORD Message body is only one word (no spaces) -score BODY_SINGLE_WORD 3.00 # limit +score BODY_SINGLE_WORD 2.500 # limit meta BODY_SINGLE_URI (__BODY_SINGLE_WORD && __HAS_ANY_URI) && !ALL_TRUSTED && !__HDRS_LCASE_KNOWN && !__FROM_ALL_NUMS && !__RCD_RDNS_SMTP describe BODY_SINGLE_URI Message body is only a URI -score BODY_SINGLE_URI 3.00 # limit +score BODY_SINGLE_URI 2.500 # limit #ifplugin Mail::SpamAssassin::Plugin::DKIM # # malformed DKIM signatures seen in the wild - see bug#6895 @@ -1339,7 +1370,7 @@ uri __URI_DOS_FILE /^[A-Z]:\\/i meta __FORM_LOW_CONTRAST (__FILL_THIS_FORM_SHORT2 || __FILL_THIS_FORM_SHORT2) && __HTML_FONT_LOW_CONTRAST_MINFP meta FORM_LOW_CONTRAST __FORM_LOW_CONTRAST && !__BUGGED_IMG && !__HAS_REPLY_TO && !__DKIM_EXISTS && !__DOS_HAS_LIST_UNSUB && !__MSGID_JAVAMAIL describe FORM_LOW_CONTRAST Fill in a form with hidden text -score FORM_LOW_CONTRAST 3.00 # Limit +score FORM_LOW_CONTRAST 2.500 # Limit tflags FORM_LOW_CONTRAST publish @@ -1387,7 +1418,7 @@ tflags FOUND_YOU publish #meta ADMITS_CANSPAM __ADMITS_CANSPAM && !__VIA_ML #describe ADMITS_CANSPAM Admits to being spam -body __ADMITS_SPAM /\bth(?:e[- ]+above|is)(?:\?+s|[- ]+is)[- ]+(?:intended[- ]+as[- ]+)?an?[- ]+(?:email[- ]+)?advert[i1l]sement\b/i +body __ADMITS_SPAM /\bth(?:e[- ]+above|is)(?:\?+s|[- ]+is)[- ]+(?:intended[- ]+as[- ]+)?an?[- ]+(?:email[- ]+)?[a@]dvert[i1l]sement\b/i meta ADMITS_SPAM __ADMITS_SPAM && !__TO___LOWER && !__MSOE_MID_WRONG_CASE && !__RP_MATCHES_RCVD describe ADMITS_SPAM Admits this is an ad @@ -1438,6 +1469,7 @@ uri __URI_WPCONTENT m,/wp-content/.*\.(?:php|html?)\b,i uri __URI_WPCONTENT_L m,/wp-content/.*\.(?:(?!gif|jpg|png|bmp|ico|eot|pdf)[a-z]{3}|(?!jpeg)[a-z]{4})\b,i uri __URI_WPINCLUDES m,/wp-includes/.*\.(?:php|html?)\b,i uri __URI_WPINCLUDES_L m,/wp-includes/.*\.(?:(?!gif|jpg|png|bmp|ico|eot|pdf)[a-z]{3}|(?!jpeg)[a-z]{4})\b,i +#uri __URI_WP_WHITELIST m,/wp-content/plugins/civicrm/,i meta URI_WP_HACKED (__URI_WPCONTENT || __URI_WPINCLUDES) && !__VIA_ML && !__HAS_ERRORS_TO && !__RCD_RDNS_SMTP && !__THREADED && !ALL_TRUSTED && !__NOT_SPOOFED describe URI_WP_HACKED URI for compromised WordPress site, possible malware score URI_WP_HACKED 3.000 # limit @@ -1457,7 +1489,6 @@ score URI_WP_HACKED_2 2.000 # limit tflags URI_WP_HACKED_2 publish - # subrules migrated from 00_FVGT_File001.cf header __SUBJ_LOWER ALL =~ /subject:\s\S{5}/ @@ -1465,20 +1496,43 @@ header __FROM_LOWER ALL =~ /from:\s\S{5}/ header __TO___LOWER ALL =~ /to:\s\S{5}/ header __DATE_LOWER ALL =~ /date:\s\S{5}/ -header __FH_HAS_XMSMAIL exists:X-MSMail-Priority # duplicates __XPRIO #header __FH_HAS_XPRIORITY exists:X-Priority -meta XPRIO __XPRIO && !ALL_TRUSTED && !__PHPMAILER_MUA && !__BUGGED_IMG && !__HAS_ERRORS_TO && !__THREADED && !__VIA_ML && !__RCD_RDNS_MX_MESSY && !__HAS_THREAD_INDEX && !__RP_MATCHES_RCVD && !__HAS_X_REF && !__LCL__ENV_AND_HDR_FROM_MATCH +meta __XPRIO_MINFP __XPRIO && !__CT_ENCRYPTED && !ALL_TRUSTED && !__HAS_ERRORS_TO && !__THREADED && !__RP_MATCHES_RCVD && !__LONGLINE && !__MAIL_LINK && !__RCD_RDNS_SMTP && !__PDF_ATTACH && !__USING_VERP1 && !__HAS_DOMAINKEY_SIG && !__LIST_PARTIAL && !__RCD_RDNS_MX_MESSY && !__XM_VBULLETIN && !__DKIM_EXISTS && !__LCL__ENV_AND_HDR_FROM_MATCH + +ifplugin Mail::SpamAssassin::Plugin::DKIM + ifplugin Mail::SpamAssassin::Plugin::SPF + meta XPRIO __XPRIO_MINFP && !DKIM_SIGNED && !__DKIM_DEPENDABLE && !DKIM_VALID && !DKIM_VALID_AU && !RCVD_IN_DNSWL_NONE && !SPF_PASS + else + meta XPRIO __XPRIO_MINFP && !DKIM_SIGNED && !__DKIM_DEPENDABLE && !DKIM_VALID && !DKIM_VALID_AU && !RCVD_IN_DNSWL_NONE + endif + tflags XPRIO net +else + meta XPRIO __XPRIO_MINFP +endif describe XPRIO Has X-Priority header -score XPRIO 1.000 # limit +score XPRIO 2.250 # limit +tflags XPRIO publish # some no-ham combinations +meta __XPRIO_SHORT_SUBJ __XPRIO && __SUBJ_SHORT +meta XPRIO_SHORT_SUBJ __XPRIO_SHORT_SUBJ && !__HAS_ANY_URI && !__TO_NO_ARROWS_R && !__ENV_AND_HDR_FROM_MATCH && !__VISTA_MSGID +describe XPRIO_SHORT_SUBJ Has X-Priority header + short subject +score XPRIO_SHORT_SUBJ 2.500 # limit +tflags XPRIO_SHORT_SUBJ publish + meta FROM_MISSP_XPRIO __XPRIO && __FROM_MISSPACED describe FROM_MISSP_XPRIO Misspaced FROM + X-Priority score FROM_MISSP_XPRIO 2.500 # limit +meta __STATIC_XPRIO_OLE __XPRIO && __RDNS_STATIC && __HAS_MIMEOLE +meta STATIC_XPRIO_OLE __STATIC_XPRIO_OLE +describe STATIC_XPRIO_OLE Static RDNS + X-Priority + MIMEOLE +score STATIC_XPRIO_OLE 2.000 # limit +tflags STATIC_XPRIO_OLE publish + # Apparent good performance is an artifact of certain corpora's collection mechanism #meta XPRIO_RPATH_NULL (__XPRIO && __BOUNCE_RPATH_NULL) && !__HAS_ERRORS_TO && !__VIA_ML && !ANY_BOUNCE_MESSAGE && !__HAS_ORGANIZATION && !__RCD_RDNS_SMTP_MESSY && !__NOT_SPOOFED #score XPRIO_RPATH_NULL 2.500 # limit @@ -1660,6 +1714,10 @@ score PUMPDUMP_MULTI 3.500 # limit tflags PUMPDUMP_MULTI publish body __STOCK_TIP /\bsto[ck]{2}\s?tip\b/i +meta STOCK_TIP __STOCK_TIP && !__DKIM_EXISTS +describe STOCK_TIP Stock tips +score STOCK_TIP 3.000 # limit +tflags STOCK_TIP publish meta PUMPDUMP_TIP __PD_CNT_1 && __STOCK_TIP describe PUMPDUMP_TIP Pump-and-dump stock tip @@ -1838,7 +1896,7 @@ header __RAND_HEADER ALL =~ /^(?!Accept-Language|Authenticatio tflags __RAND_HEADER multiple, maxhits=4 meta RAND_HEADER_MANY __RAND_HEADER > 3 describe RAND_HEADER_MANY Many random gibberish message headers -score RAND_HEADER_MANY 3.500 # limit +score RAND_HEADER_MANY 3.000 # limit tflags RAND_HEADER_MANY publish @@ -1859,7 +1917,7 @@ describe DUP_SUSP_HDR Duplicate suspicious message headers score DUP_SUSP_HDR 2.500 # limit # seen 10/2014: "https://www.google.com/url?q=https://copy.com/ApbFn2848pQm/ShippingInvoice_6974.PDF.scr?download=1&sa=D&sntz=1&usg=AFQjCNGhvWhljnujQlP85tA6YUsddfuJow" -uri __GOOG_MALWARE_DNLD m;^https?://[^/]*\.google\.com/[^?]*url\?.*[\?&]download=1;i +uri __GOOG_MALWARE_DNLD m;^https?://[^/]*\.google\.com/[^?]*url\?.*[\?&/]download;i meta GOOG_MALWARE_DNLD __GOOG_MALWARE_DNLD describe GOOG_MALWARE_DNLD File download via Google - Malware? score GOOG_MALWARE_DNLD 5.000 # limit @@ -1874,7 +1932,7 @@ body SOLICIT_BIZ /\bbusiness solicitation messag/i body __SPELLED_OUT_NUM /\b(?:(?:one|two|three|four|five|six|seven|eight|nine|zero)[\s_-]?){4,}/i meta SPELLED_OUT_NUMBER __SPELLED_OUT_NUM && !__DKIM_EXISTS describe SPELLED_OUT_NUMBER Spelled out a number (one two three) -score SPELLED_OUT_NUMBER 3.250 # limit +score SPELLED_OUT_NUMBER 3.000 # limit body __NUM_SPCD_LTRS /\d{4}\s(?:[a-z]\s){5}/i @@ -1901,17 +1959,37 @@ meta TEQF_USR_IMAGE __TO_EQ_FROM_USR_NN_MINFP && __ANY_IMAGE describe TEQF_USR_IMAGE To and from user nearly same + image tflags TEQF_USR_IMAGE publish +meta TEQF_USR_POLITE __TO_EQ_FROM_USR_NN && __FRAUD_IRT +describe TEQF_USR_POLITE To and from user nearly same + polite greeting +score TEQF_USR_POLITE 2.000 # limit + meta __MSGID_HEX_MALF __MSGID_NOFQDN2 && __MSGID_OK_HEX meta __URI_ONLY_MSGID_MALF __BODY_URI_ONLY && __MSGID_NOFQDN2 -meta URI_ONLY_MSGID_MALF __URI_ONLY_MSGID_MALF && !__RP_MATCHES_RCVD && !__URI_MAILTO && !__NOT_SPOOFED && !__DKIM_EXISTS && !__MSGID_JAVAMAIL && !__HAS_REPLY_TO +#ifplugin Mail::SpamAssassin::Plugin::DNSEval + meta URI_ONLY_MSGID_MALF __URI_ONLY_MSGID_MALF && !__RP_MATCHES_RCVD && !__URI_MAILTO && !__NOT_SPOOFED && !__DKIM_EXISTS && !__MSGID_JAVAMAIL && !__HAS_REPLY_TO && !RCVD_IN_DNSWL_LOW + tflags URI_ONLY_MSGID_MALF net +#else + meta URI_ONLY_MSGID_MALF __URI_ONLY_MSGID_MALF && !__RP_MATCHES_RCVD && !__URI_MAILTO && !__NOT_SPOOFED && !__DKIM_EXISTS && !__MSGID_JAVAMAIL && !__HAS_REPLY_TO +#endif describe URI_ONLY_MSGID_MALF URI only + malformed message ID +score URI_ONLY_MSGID_MALF 2.000 # limit tflags URI_ONLY_MSGID_MALF publish -meta GOOG_REDIR_SHORT __GOOG_REDIR && __KAM_BODY_LENGTH_LT_512 +# These may be a bit risky, the masscheck ham corpus may not +# reflect how often these are legit in Real Life... +meta GOOG_REDIR_SHORT __GOOG_REDIR && __LCL__KAM_BODY_LENGTH_LT_512 describe GOOG_REDIR_SHORT Google redirect to obscure spamvertised website + short message tflags GOOG_REDIR_SHORT publish +meta GOOG_REDIR_NORDNS __GOOG_REDIR && RDNS_NONE +describe GOOG_REDIR_NORDNS Google redirect to obscure spamvertised website + no rDNS + +meta GOOG_REDIR_HTML_ONLY (__GOOG_REDIR && MIME_HTML_ONLY) && !RDNS_NONE && !__LCL__KAM_BODY_LENGTH_LT_512 +describe GOOG_REDIR_HTML_ONLY Google redirect to obscure spamvertised website + HTML only +score GOOG_REDIR_HTML_ONLY 2.000 # limit + + # low S/O, apparently lots of invisible ham... rawbody __STY_INVIS /\bstyle\s*=(?:3d)?\s*"\s*(?:visibility\s*:\s*hidden\s*;|display\s*:\s*none\s*;|background\s*:)/i @@ -1920,6 +1998,8 @@ meta __STY_INVIS_MANY __STY_INVIS > 5 #meta HTML_TEXT_INVISIBLE __STY_INVIS_MANY #describe HTML_TEXT_INVISIBLE Hidden text #score HTML_TEXT_INVISIBLE 2.000 # limit +# try it on span tags only... +rawbody __SPAN_INVIS /]{0,80}style\s*=(?:3d)?\s*"\s*(?:visibility\s*:\s*hidden\s*;|display\s*:\s*none\s*;|background\s*:)/i # Adapted from SARE rules __SARE_HTML_SINGLET* rawbody __HTML_SINGLET />\s*(?:[a-z"]|&\#(?:\d+|x[0-9a-f]+);)\s* 2 +#body __PUNCT_ODD_SPACING /[a-z]{3}\s+[.,][a-z]{3}/ +#tflags __PUNCT_ODD_SPACING multiple, maxhits=3 +#meta __PUNCT_ODD_SPACING_MANY __PUNCT_ODD_SPACING > 2 + +# poor S/O - how is this in ham? +#header XMAILER_MANY ALL =~ /\nX-Mailer:(?:[^\n]+\n)+X-Mailer:/ism +#describe XMAILER_MANY Has multiple X-Mailer: headers + +body __RAW_TOKEN_BODY /\#(?:(?:First|Last)Name|Email)\#/i +#header __RAW_TOKEN_HDR ALL =~ /\$(?:rand[^$]{0,10})\$/i +#tflags __RAW_TOKEN multiple maxhits=3 +#meta RAW_TOKENS __RAW_TOKEN > 2 +#describe RAW_TOKENS Raw mail merge tokens in body + +header __REPTO_CHN_FREEM Reply-To =~ /\@(?:sina|aliyun)\.com/i + +meta __SPOOFED_FREEM_REPTO __SPOOFED_FREEMAIL && FREEMAIL_REPLYTO + +meta SPOOFED_FREEM_REPTO_CHN (__SPOOFED_FREEM_REPTO || FORGED_YAHOO_RCVD) && __REPTO_CHN_FREEM +describe SPOOFED_FREEM_REPTO_CHN Forged freemail sender with Chinese freemail reply-to +score SPOOFED_FREEM_REPTO_CHN 3.500 +tflags SPOOFED_FREEM_REPTO_CHN publish + +meta SPOOFED_FREEM_REPTO __SPOOFED_FREEM_REPTO && !__THREADED +describe SPOOFED_FREEM_REPTO Forged freemail sender with freemail reply-to +score SPOOFED_FREEM_REPTO 2.500 +tflags SPOOFED_FREEM_REPTO publish + + +#header __VERY_LONG_REPTO Reply-To =~ /[^<\s\@]{25,}\@/ +#meta __VERY_LONG_REPTO_SHORT_MSG __VERY_LONG_REPTO && __HTML_LENGTH_0000_1024 +#meta VERY_LONG_REPTO_SHORT_MSG __VERY_LONG_REPTO_SHORT_MSG && !__VIA_ML && !__TO_EQ_FROM_DOM && !__THREAD_INDEX_GOOD +#describe VERY_LONG_REPTO_SHORT_MSG Very long Reply-To username + short message +#score VERY_LONG_REPTO_SHORT_MSG 2.500 # limit +#tflags VERY_LONG_REPTO_SHORT_MSG publish +# +#ifplugin Mail::SpamAssassin::Plugin::FreeMail +# meta __VERY_LONG_FREEM_REPTO __VERY_LONG_REPTO && FREEMAIL_REPLYTO +# meta VERY_LONG_FREEM_REPTO __VERY_LONG_FREEM_REPTO +# describe VERY_LONG_FREEM_REPTO Very long freemail Reply-To username +# score VERY_LONG_FREEM_REPTO 2.500 # limit +# tflags VERY_LONG_FREEM_REPTO publish +#endif + +# for ; Mon, 2 Nov 2015 14:27:08 GMT +# (envelope-from fastnet.co.uk.12056010.steve.stewart@vmta27.topreasonstovisit.com) +# S/O low, seems to be common in legit mailing lists +# Maybe in meta with "not a mailing list" rules? +#header __RECIP_IN_ENV_FM_01 Received =~ /for\s+<([^\@]+)\@([^>]+)>.*envelope-from\s+\2\.\d+\.\1\@/i +#header __RECIP_IN_ENV_FM_02 Received =~ /for\s+<([^\@]+)\@([^>]+)>.*envelope-from\s+[^@]*\2[^@]*\@/i + + +uri URI_MALWARE_CWALL /\/abuse_report\.php\?(?!username=)[^&\s.]{1,100}\./i +describe URI_MALWARE_CWALL Potential CryptoWall malware URL + + +meta __LIST_PARTIAL_SHORT_MSG __HTML_LENGTH_0000_1024 && __LIST_PARTIAL +meta LIST_PARTIAL_SHORT_MSG __LIST_PARTIAL_SHORT_MSG && !__DKIM_EXISTS +describe LIST_PARTIAL_SHORT_MSG Incomplete mailing list headers + short message +score LIST_PARTIAL_SHORT_MSG 2.500 # limit + +# duplicates __HAS_MSMAIL_PRI +#header __FH_HAS_XMSMAIL exists:X-MSMail-Priority + +meta __BOGUS_MSM_HDRS __HAS_MSMAIL_PRI && __MSOE_MID_WRONG_CASE && __HDR_ORDER_FTSDMCXXXX +meta BOGUS_MSM_HDRS __BOGUS_MSM_HDRS +describe BOGUS_MSM_HDRS Apparently bogus Microsoft email headers +score BOGUS_MSM_HDRS 3.000 # limit +tflags BOGUS_MSM_HDRS publish + +#meta __BOGUS_MSM_PRIO __HAS_MSMAIL_PRI && __HDR_ORDER_FTSDMCXXXX +#meta __BOGUS_MSM_PRIO_MINFP __BOGUS_MSM_PRIO && !__BOGUS_MSM_HDRS && !__MSGID_NOFQDN2 && !__ANY_OUTLOOK_MUA && !__RCD_RDNS_MAIL_MESSY + +meta __MSM_PRIO_REPTO __HAS_MSMAIL_PRI && __REPLYTO_EXISTS && __SUBJ_SHORT +meta MSM_PRIO_REPTO __MSM_PRIO_REPTO && !__ENV_AND_HDR_FROM_MATCH +describe MSM_PRIO_REPTO MSMail priority header + Reply-to + short subject +score MSM_PRIO_REPTO 2.500 # limit +tflags MSM_PRIO_REPTO publish + +header __XM_YAMAIL X-Mailer =~ /^Yamail/ + + +# __GATED_THROUGH_RCVD_REMOVER includes messages with no Received headers *at all*. +# Don't consider those, only consider the ones where *some* Received headers may have been removed +meta __RCVD_RMV_PARTIAL __GATED_THROUGH_RCVD_REMOVER && __HAS_RCVD + +# Compare __GATED_THROUGH_RCVD_REMOVER and "via ezmlm" +header __ML_EZMLM Mailing-List =~ /\bezmlm\b/ + + +# easy for spammers to forge a signed message and still have it displayed to the recipient? +#header KHOP_ENCRYPTED_CONTENT Content-Type =~ /^multipart\/(?:x-)?(?:pgp-)?encrypted|application\/(?:x-)?pkcs7-mime/ +header __CT_ENCRYPTED Content-Type =~ /^multipart\/(?:x-)?(?:pgp-)?encrypted|application\/(?:x-)?pkcs7-mime/ +meta ENCRYPTED_MESSAGE __CT_ENCRYPTED +describe ENCRYPTED_MESSAGE Message is encrypted, not likely to be spam +score ENCRYPTED_MESSAGE -1.000 +tflags ENCRYPTED_MESSAGE nice,publish + + +#body __PHONE_GIBBERISH_01 /(?:\b\d\d\d-\d\d\d-\d\d\d\d\s+[a-z][^\d\s:.]+\s+){15}/ + +header __HAS_GMX_BULK exists:X-Gmx-Bulk + +ifplugin Mail::SpamAssassin::Plugin::HTMLEval + body __HTML_TAG_BALANCE_CENTER eval:html_tag_balance('center', '!= 0') + meta HTML_TAG_BALANCE_CENTER __HTML_TAG_BALANCE_CENTER && !__RCD_RDNS_MAIL_MESSY && !__RCD_RDNS_SMTP_MESSY + describe HTML_TAG_BALANCE_CENTER Malformatted HTML +endif + + +# more random garbage message headers 01/2016 +header __HDR_CASE_REVERSED ALL =~ /^(?!DomainKey)[^-:\s]*[a-z][A-Z]/m +tflags __HDR_CASE_REVERSED multiple maxhits=4 +meta __HDR_CASE_REV_MANY (__HDR_CASE_REVERSED > 3) + +meta HDR_CASE_REV_MANY __HDR_CASE_REV_MANY +describe HDR_CASE_REV_MANY Multiple malformed (possibly random gibberish) message headers +score HDR_CASE_REV_MANY 2.000 # limit + +meta HDR_CASE_REV_ENC __HDR_CASE_REVERSED && (__FROM_ENCODED_B64 || __TVD_SPACE_ENCODED ) +describe HDR_CASE_REV_ENC Malformed (possibly random gibberish) message header + suspicious encoding +score HDR_CASE_REV_ENC 2.000 # limit + +meta HDR_CASE_REV_HELO_IP __HDR_CASE_REVERSED && __HELO_MISC_IP +describe HDR_CASE_REV_HELO_IP Malformed (possibly random gibberish) message header + IP in HELO +score HDR_CASE_REV_HELO_IP 2.000 # limit + + + +header __HAS_CAMPAIGN exists:X-Campaign +header __HAS_CAMPAIGNID exists:X-Campaignid +header __HAS_CID exists:X-CID +header __HAS_XM_LID exists:X-Mailer-LID +header __HAS_XM_RECPTID exists:X-Mailer-RecptId +header __HAS_XM_SID exists:X-Mailer-SID +header __HAS_XM_SENTBY exists:X-Mailer-Sent-By +header __HAS_DOMAINKEY_SIG exists:DomainKey-Signature +header __HAS_PHP_SCRIPT exists:X-PHP-Script +header __HAS_PHP_ORIG_SCRIPT exists:X-PHP-Originating-Script + +header __FROM_WORDY From:addr =~ /^(?:(?:[A-Z][A-Za-z]+|or|&)\.)+[A-Z][A-Za-z]+\@/ +#header __FROM_WORDY From:addr =~ /^(?:(?:[A-Z][A-Za-z]+|or|&)\.)+[A-Z][A-Za-z]+(?]*[?">]/i + +meta __MIMEOLE_DIRECT_TO_MX __HAS_MIMEOLE && __DOS_DIRECT_TO_MX +meta MIMEOLE_DIRECT_TO_MX __MIMEOLE_DIRECT_TO_MX && !__ANY_IMAGE_ATTACH +describe MIMEOLE_DIRECT_TO_MX MIMEOLE + direct-to-MX +score MIMEOLE_DIRECT_TO_MX 2.000 # limit + + +# suggested 9/2016 by ChipM in personal email +# would be a LOT nicer if rules could use other rules' captures +# terrible S/O +#full __FROM_FULLN_URL m;^From:\s+"?([a-z]+)\s([a-z]+)\b.*?https?://[^/]+/\1[_.]\2\b;ism +#meta FROM_FULLN_URL __FROM_FULLN_URL && !__THREADED +#describe FROM_FULLN_URL From address full name is in body URL - possible phishing +#score FROM_FULLN_URL 2.000 # limit -header XMAILER_MANY ALL =~ /\nX-Mailer:(?:[^\n]+\n)+X-Mailer:/ism -describe XMAILER_MANY Has multiple X-Mailer: headers +# warning: __SUBJECT_EMPTY true if header entirely missing... +header __SUBJECT_EMPTY Subject:raw =~ /^$/ +meta __SUBJECT_PRESENT_EMPTY __HAS_SUBJECT && __SUBJECT_EMPTY -body __RAW_TOKEN /\#(?:(?:First|Last)Name|Email)\#/i -tflags __RAW_TOKEN multiple maxhits=3 -meta RAW_TOKENS __RAW_TOKEN > 2 -describe RAW_TOKENS Raw mail merge tokens in body +body __BAYES_POISON_NUMS_01 /\s([0-9]{6,})\s(?:.{15,}?\s\1\s){10}/ diff --git rulesrc/sandbox/jhardin/20_shared_subrules.cf rulesrc/sandbox/jhardin/20_shared_subrules.cf index 33d32d7bd..75b66ede3 100644 --- rulesrc/sandbox/jhardin/20_shared_subrules.cf +++ rulesrc/sandbox/jhardin/20_shared_subrules.cf @@ -5,3 +5,11 @@ # originally from khopesh/20_khop_experimental.cf rawbody __BUGGED_IMG m{]{0,100}\ssrc=.?https?://[^>]{6,80}(?:\?[^>]{8}|[^a-z](?![a-f]{3}|20\d\d[01]\d[0-3]\d)[0-9a-f]{8})}i + +# originally from khopesh/20_s25r.cf +# Sanity check: how much freemail lacks spf or dkim? +# JHardin: convert to subrule and scored meta +meta __SPOOFED_FREEMAIL !__NOT_SPOOFED && FREEMAIL_FROM +meta SPOOFED_FREEMAIL __SPOOFED_FREEMAIL && !__FS_SUBJ_RE + + diff --git rulesrc/sandbox/jhardin/20_tbird_image_spam.cf rulesrc/sandbox/jhardin/20_tbird_image_spam.cf index 680f0db23..e1587b207 100644 --- rulesrc/sandbox/jhardin/20_tbird_image_spam.cf +++ rulesrc/sandbox/jhardin/20_tbird_image_spam.cf @@ -29,58 +29,58 @@ describe FORGED_TBIRD_IMG_ARROW Likely forged Thunderbird image spam meta __TO_NO_BRKTS_HTML_IMG __TO_NO_ARROWS_R && !__TO_UNDISCLOSED && HTML_MESSAGE && __ONE_IMG meta TO_NO_BRKTS_HTML_IMG __TO_NO_BRKTS_HTML_IMG && !__FM_TO_ALL_NUMS && !__FROM_FULL_NAME && !__HAS_THREAD_INDEX && !__DKIM_EXISTS && !__HAS_SENDER && !__THREADED && !__LONGLINE -describe TO_NO_BRKTS_HTML_IMG To: misformatted and HTML and one image +describe TO_NO_BRKTS_HTML_IMG To: lacks brackets and HTML and one image score TO_NO_BRKTS_HTML_IMG 2.000 # limit tflags TO_NO_BRKTS_HTML_IMG publish meta __TO_NO_BRKTS_HTML_ONLY __TO_NO_ARROWS_R && !__TO_UNDISCLOSED && MIME_HTML_ONLY meta TO_NO_BRKTS_HTML_ONLY __TO_NO_BRKTS_HTML_ONLY && !RDNS_NONE && !__MIME_QP && !__MSGID_JAVAMAIL && !__CTYPE_CHARSET_QUOTED && !__SUBJECT_ENCODED_B64 && !__VIA_ML && !__MSGID_BEFORE_RECEIVED && !__MIME_BASE64 && !__RCD_RDNS_MAIL_MESSY && !__COMMENT_EXISTS && !LOTS_OF_MONEY && !__TAG_EXISTS_CENTER && !__UPPERCASE_URI && !__UNSUB_LINK && !__RCD_RDNS_MX_MESSY && !__DKIM_EXISTS && !__BUGGED_IMG && !__FM_TO_ALL_NUMS && !__URI_12LTRDOM && !__RDNS_NO_SUBDOM && !__HDRS_LCASE && !__LCL__ENV_AND_HDR_FROM_MATCH score TO_NO_BRKTS_HTML_ONLY 2.00 # limit -describe TO_NO_BRKTS_HTML_ONLY To: misformatted and HTML only +describe TO_NO_BRKTS_HTML_ONLY To: lacks brackets and HTML only tflags TO_NO_BRKTS_HTML_ONLY publish meta __TO_NO_BRKTS_DYNIP __TO_NO_ARROWS_R && !__TO_UNDISCLOSED && RDNS_DYNAMIC meta TO_NO_BRKTS_DYNIP __TO_NO_BRKTS_DYNIP && !__NAME_IS_EMAIL && !__MSGID_OK_HEX && !__UNSUB_LINK && !__THREADED && !__RCD_RDNS_MX_MESSY && !__COMMENT_EXISTS && !__MUA_TBIRD && !__CD && !__ML1 && !__RP_MATCHES_RCVD && !__SUBSCRIPTION_INFO && !__HAS_THREAD_INDEX && !__IS_EXCH -describe TO_NO_BRKTS_DYNIP To: misformatted and dynamic rDNS +describe TO_NO_BRKTS_DYNIP To: lacks brackets and dynamic rDNS #tflags TO_NO_BRKTS_DYNIP publish #meta __TO_NO_BRKTS_NORDNS __TO_NO_ARROWS_R && !__TO_UNDISCLOSED && RDNS_NONE #meta TO_NO_BRKTS_NORDNS __TO_NO_BRKTS_NORDNS && !ALL_TRUSTED && !__NOT_SPOOFED #score TO_NO_BRKTS_NORDNS 0.75 # limit, rDNS can fail -#describe TO_NO_BRKTS_NORDNS To: misformatted and no rDNS +#describe TO_NO_BRKTS_NORDNS To: lacks brackets and no rDNS meta __TO_NO_BRKTS_NORDNS_HTML __TO_NO_BRKTS_HTML_ONLY && RDNS_NONE meta TO_NO_BRKTS_NORDNS_HTML __TO_NO_BRKTS_NORDNS_HTML && !ALL_TRUSTED && !__MSGID_JAVAMAIL && !__MSGID_BEFORE_RECEIVED && !__VIA_ML && !__UA_MUTT && !__COMMENT_EXISTS && !__HTML_LENGTH_384 && !__MIME_BASE64 && !__UPPERCASE_URI && !__TO___LOWER && !__TAG_EXISTS_CENTER && !__LONGLINE && !__DKIM_EXISTS score TO_NO_BRKTS_NORDNS_HTML 2.00 # limit -describe TO_NO_BRKTS_NORDNS_HTML To: misformatted and no rDNS and HTML only +describe TO_NO_BRKTS_NORDNS_HTML To: lacks brackets and no rDNS and HTML only tflags TO_NO_BRKTS_NORDNS_HTML publish meta __TO_NO_BRKTS_MSFT __TO_NO_ARROWS_R && !__TO_UNDISCLOSED && (__ANY_OUTLOOK_MUA || __MIMEOLE_MS) meta TO_NO_BRKTS_MSFT __TO_NO_BRKTS_MSFT && !__VIA_ML && !__LYRIS_EZLM_REMAILER && !__THREAD_INDEX_GOOD && !__IS_EXCH && !__UNSUB_LINK && !__NOT_SPOOFED && !__DOS_HAS_LIST_UNSUB && !__NAME_EQ_EMAIL && !__SUBJECT_ENCODED_QP && !__THREADED && !__HAS_THREAD_INDEX && !__HAS_X_REF && !__HAS_IN_REPLY_TO && !__FROM_ENCODED_QP && !__RP_MATCHES_RCVD -describe TO_NO_BRKTS_MSFT To: misformatted and supposed Microsoft tool +describe TO_NO_BRKTS_MSFT To: lacks brackets and supposed Microsoft tool score TO_NO_BRKTS_MSFT 2.50 # limit meta __TO_NO_BRKTS_PCNT __TO_NO_ARROWS_R && __FB_NUM_PERCNT meta TO_NO_BRKTS_PCNT __TO_NO_BRKTS_PCNT && !__SUBJECT_ENCODED_B64 && !__DOS_HAS_LIST_UNSUB && !__VIA_ML && !__ISO_2022_JP_DELIM && !__IMS_MSGID && !__THREAD_INDEX_GOOD && !__RCD_RDNS_MX_MESSY && !__UNSUB_LINK && !__LONGLINE && !URI_HEX && !__RP_MATCHES_RCVD && !__MAIL_LINK && !__BUGGED_IMG && !__MIME_QP && !__COMMENT_EXISTS && !__TAG_EXISTS_STYLE && !__LCL__ENV_AND_HDR_FROM_MATCH && !__HAS_X_MAILER && !__HTML_LINK_IMAGE && !__SENDER_BOT && !__DKIM_EXISTS && !__KHOP_NO_FULL_NAME && !__THREADED -describe TO_NO_BRKTS_PCNT To: misformatted + percentage +describe TO_NO_BRKTS_PCNT To: lacks brackets + percentage score TO_NO_BRKTS_PCNT 2.50 # limit tflags TO_NO_BRKTS_PCNT publish #meta __TO_NO_BRKTS_DIRECT __TO_NO_ARROWS_R && __DOS_DIRECT_TO_MX #meta TO_NO_BRKTS_DIRECT __TO_NO_BRKTS_DIRECT && !__IS_EXCH && !__THREAD_INDEX_GOOD && !__COMMENT_EXISTS && !__RCD_RDNS_MTA_MESSY && !__TVD_SPACE_RATIO && !__THREADED && !__FB_DO_NOT_REPLY && !__VBOUNCE_MAILSWEEP3 && !__DEAL && !__RCD_RDNS_MAIL_MESSY && !__UNSUB_LINK && !__RP_MATCHES_RCVD && !__DKIM_EXISTS && !__TAG_EXISTS_CENTER -#describe TO_NO_BRKTS_DIRECT To: misformatted and direct-to-MX +#describe TO_NO_BRKTS_DIRECT To: lacks brackets and direct-to-MX #tflags TO_NO_BRKTS_DIRECT publish #meta __TO_NO_BRKTS_NOTLIST __TO_NO_ARROWS_R && !__VIA_ML #meta TO_NO_BRKTS_NOTLIST __TO_NO_BRKTS_NOTLIST && !__UNUSABLE_MSGID && !__THREADED && !__SUBJ_RE && !__RCD_RDNS_MAIL_MESSY && !__HAS_MIMEOLE && !__THREAD_INDEX_GOOD && !__IMS_MSGID && !__RCD_RDNS_MTA_MESSY && !__BOUNCE_RPATH_NULL && !__BOUNCE_STAT_FAIL && !__BOUNCE_CTYPE && !ALL_TRUSTED && !__FB_DO_NOT_REPLY && !__RPATH_12LTRDOM && !__MIME_BASE64 && !__UPPERCASE_URI && !__TO___LOWER && !__BUGGED_IMG && !__JM_REACTOR_DATE && !__RP_MATCHES_RCVD && !__X_CRON_ENV && !NO_RELAYS -#describe TO_NO_BRKTS_NOTLIST To: misformatted and not a mailing list +#describe TO_NO_BRKTS_NOTLIST To: lacks brackets and not a mailing list ifplugin Mail::SpamAssassin::Plugin::FreeMail # meta TO_NO_BRKTS_FREEMAIL __TO_NO_ARROWS_R && !__TO_UNDISCLOSED && (FREEMAIL_FROM || FREEMAIL_REPLYTO) meta __TO_NO_BRKTS_FREEMAIL __TO_NO_ARROWS_R && (FREEMAIL_FROM || FREEMAIL_REPLYTO) meta TO_NO_BRKTS_FREEMAIL __TO_NO_BRKTS_FREEMAIL && !__TO_EQ_FROM_DOM - describe TO_NO_BRKTS_FREEMAIL To: misformatted and free email service + describe TO_NO_BRKTS_FREEMAIL To: lacks brackets and free email service #score TO_NO_BRKTS_FREEMAIL 0.20 tflags TO_NO_BRKTS_FREEMAIL nopublish else @@ -91,7 +91,7 @@ meta __TO_NO_BRKTS_FROM_RUNON __TO_NO_ARROWS_R && !__TO_UNDISCLOSED && __ meta __TO_NO_BRKTS_FROM_MSSP __TO_NO_ARROWS_R && !__TO_UNDISCLOSED && __FROM_MISSPACED meta TO_NO_BRKTS_FROM_MSSP __TO_NO_BRKTS_FROM_RUNON && !__RCD_RDNS_MTA_MESSY && !__CTYPE_MULTIPART_ALT && !__REPTO_QUOTE && !__MIME_QP && !__TO___LOWER && !__BUGGED_IMG && !__SUBJECT_ENCODED_QP && !__VIA_ML && !__FR_SPACING_8 && !__TAG_EXISTS_CENTER && !__RCVD_ZIXMAIL && !__RP_MATCHES_RCVD && !__HAS_SENDER score TO_NO_BRKTS_FROM_MSSP 2.50 # max -describe TO_NO_BRKTS_FROM_MSSP Multiple formatting errors +describe TO_NO_BRKTS_FROM_MSSP Multiple header formatting problems # The boundary *does* FP on legit mail. However, all of KB's recent samples diff --git rulesrc/sandbox/jhardin/20_uri_obfu_ws.cf rulesrc/sandbox/jhardin/20_uri_obfu_ws.cf index f6160a4c0..13af0ea96 100644 --- rulesrc/sandbox/jhardin/20_uri_obfu_ws.cf +++ rulesrc/sandbox/jhardin/20_uri_obfu_ws.cf @@ -10,9 +10,13 @@ ifplugin Mail::SpamAssassin::Plugin::ReplaceTags -body URI_OBFU_WWW /(?+[^[:alnum:]]{1,3})?[[:alnum:]][-\w]{1,20}[[:alnum:]][^[:alnum:]]{1,3}(?:+[^[:alnum:]]{1,3})?(?:c\s?o\s?m|n\s?e\s?t|o\s?r\s?g|b\s?i\s?z|i\s?n\s?f\s?o)_*\b/i -describe URI_OBFU_WWW Obfuscated URI -replace_rules URI_OBFU_WWW +# Disabled 10/2016 - abysmal S/O, it's ignoring the score limit, and reported FPs in bug#7365 that I can't repro +# trying minor tuning change before we give up, will disable again if that doesn't help +# Disabled again 11/2016, still ignoring the score limit, abysmal S/O +#body URI_OBFU_WWW /(?+[^[:alnum:]]{1,3})?[[:alnum:]][-\w]{1,20}[[:alnum:]][^[:alnum:]]{1,3}(?:+[^[:alnum:]]{1,3})?(?:c\s?o\s?m|n\s?e\s?t|o\s?r\s?g|b\s?i\s?z|i\s?n\s?f\s?o)_*\b/i +#describe URI_OBFU_WWW Obfuscated URI +#replace_rules URI_OBFU_WWW +#score URI_OBFU_WWW 2.000 # limit endif diff --git rulesrc/sandbox/jquinn/20_misc.cf rulesrc/sandbox/jquinn/20_misc.cf index 18c2d58ff..3dbeb61aa 100644 --- rulesrc/sandbox/jquinn/20_misc.cf +++ rulesrc/sandbox/jquinn/20_misc.cf @@ -26,3 +26,7 @@ body EXCUSE_24 /you(?:'ve|'re| have| are)? receiv(?:e|ed|ing) this (?:advertisement|offer|special|recurring|paid).{0,16}\b(?:by either|because)/i describe EXCUSE_24 Claims you wanted this ad +header __USING_VERP1 Return-Path =~ /[+-].*=/ + +meta USING_VERP (__USING_VERP1 && !__HAS_LIST_ID) +describe USING_VERP Message uses VERP diff --git rulesrc/sandbox/khopesh/20_s25r.cf rulesrc/sandbox/khopesh/20_s25r.cf index 3b970ab47..1bd531950 100644 --- rulesrc/sandbox/khopesh/20_s25r.cf +++ rulesrc/sandbox/khopesh/20_s25r.cf @@ -59,10 +59,6 @@ describe KHOP_BOTNET_UNCLEAN Relay looks like a dynamic address tflags KHOP_BOTNET_UNCLEAN nopublish - -# Sanity check: how much freemail lacks spf or dkim? -meta SPOOFED_FREEMAIL !__NOT_SPOOFED && FREEMAIL_FROM - - # see if we can further reduce the FPs w/out impacting the spam hits too hard header __RDNS_HEX9 X-Spam-Relays-External =~ /^[^\]]+ rdns=[^ .]*\d(?![0-9a-f]*[a-f]{3})[0-9a-f]{8}/ + diff --git rulesrc/sandbox/kmcgrail/20_rules_to_sandbox.cf rulesrc/sandbox/kmcgrail/20_rules_to_sandbox.cf new file mode 100644 index 000000000..e899f6f3f --- /dev/null +++ rulesrc/sandbox/kmcgrail/20_rules_to_sandbox.cf @@ -0,0 +1,60 @@ +# SpamAssassin rules file: kam sandbox +# +# Please don't modify this file as your changes will be overwritten with +# the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead. +# See 'perldoc Mail::SpamAssassin::Conf' for details. +# +# <@LICENSE> +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +########################################################################### + +#See bug 7192 for MILLION_USD, NA_DOLLARS & US_DOLLARS where these rules are being moved to sandbox for ruleqa testing and auto promotion. + +body MILLION_USD /Million\b.{0,40}\b(?:United States? Dollars?|USD)/i +describe MILLION_USD Talks about millions of dollars +#OLD SCORE - SETTING A CEILING OF 2 - score MILLION_USD 3.799 2.477 3.221 3.247 +score MILLION_USD 2 +lang de describe MILLION_USD Erwhnt Millonen von Dollar +lang fr describe MILLION_USD Phrase cl d'escroquerie nigrienne (millions of dollars) +lang nl describe MILLION_USD Heeft het over miljoenen dollars +lang pl describe MILLION_USD O milionach dolarw +lang pt_BR describe MILLION_USD Fala sobre milhes de dlares + +body NA_DOLLARS /\b(?:\d{1,3})?Million\b.{0,40}\b(?:Canadian Dollar?s?|US\$|U\.? ?S\.? Dollar)/i +describe NA_DOLLARS Talks about a million North American dollars +#OLD SCORE - SETTING A CEILING OF 1.5 score NA_DOLLARS 3.599 +score NA_DOLLARS 1.5 +lang de describe NA_DOLLARS Handelt von einer Million Dollar aus den US oder Kanada +lang fr describe NA_DOLLARS Parle d'un million de dollars "nord-amricains" +lang nl describe NA_DOLLARS Praat over een miljoen Noord-Amerikaanse dollars +lang pl describe NA_DOLLARS O milionie Pnocno Amerykaskich dolarw +lang pt_BR describe NA_DOLLARS Fala sobre milhes de dlares norte americanos ou canadenses + + +body US_DOLLARS_3 /(?:\$|usd).?\d{1,3}[,.]\d{3}[,.]\d{3}(?:[,.]\d\d)?/i +describe US_DOLLARS_3 Mentions millions of $ ($NN,NNN,NNN.NN) +#OLD SCORE - SETTING A CEILING OF 2 - score US_DOLLARS_3 2.599 2.523 1.780 1.754 +score US_DOLLARS_3 2.0 +lang de describe US_DOLLARS_3 Erwhnt Millonen von Dollar +lang fr describe US_DOLLARS_3 Escroq. nigrienne, version modifie, phrase cl ($NN,NNN,NNN.NN) +lang nl describe US_DOLLARS_3 Vermeldt miljoenen $ ($NN,NNN,NNN.NN) +lang pl describe US_DOLLARS_3 Wspomina miliony $ ($NN,NNN,NNN.NN) +lang pt_BR describe US_DOLLARS_3 Contm $($NN,NNN,NNN.NN) + + +#EOF diff --git rulesrc/sandbox/maddoc/99_doc_test.cf rulesrc/sandbox/maddoc/99_doc_test.cf index bea844247..0eacc3446 100644 --- rulesrc/sandbox/maddoc/99_doc_test.cf +++ rulesrc/sandbox/maddoc/99_doc_test.cf @@ -100,7 +100,7 @@ meta FSL_STACKED_TEXT (__TWO_WORD_LINES > 10) # SMF: FP avoidance # JHardin: don't hit 127.x.x.x (loopback) addresses header __FSL_HELO_BARE_IP_1 X-Spam-Relays-External =~ /^[^\]]+ helo=(?!127)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} /i -meta FSL_HELO_BARE_IP_1 __FSL_HELO_BARE_IP_1 && !FSL_HELO_BARE_IP_2 +meta FSL_HELO_BARE_IP_1 __FSL_HELO_BARE_IP_1 # score FSL_HELO_BARE_IP_1 0.001 @@ -109,8 +109,8 @@ meta FSL_HELO_BARE_IP_1 __FSL_HELO_BARE_IP_1 && !FSL_HELO_BARE_IP_2 # score limit due to partial overlap with RCVD_NUMERIC_HELO # JHardin: don't hit 127.x.x.x (loopback) addresses header __FSL_HELO_BARE_IP_2 X-Spam-Relays-External =~ /helo=(?!127)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} /i -meta FSL_HELO_BARE_IP_2 __FSL_HELO_BARE_IP_2 && !__VIA_ML && !__HAS_ERRORS_TO -score FSL_HELO_BARE_IP_2 2.000 +meta FSL_HELO_BARE_IP_2 __FSL_HELO_BARE_IP_2 && !ALL_TRUSTED && !FSL_HELO_BARE_IP_1 && !__VIA_ML && !__HAS_ERRORS_TO +score FSL_HELO_BARE_IP_2 1.500 header FSL_HELO_NON_FQDN_1 X-Spam-Relays-External =~ /^[^\]]+ helo=[a-zA-Z0-9-_]+ /i # score FSL_HELO_NON_FQDN_1 0.001 @@ -140,7 +140,7 @@ header FSL_FAKE_HOTMAIL_RVCD X-Spam-Relays-External =~ /mx[1234]\.hotmail\.co # header FSL_HELO_UNKNOWN X-Spam-Relays-External =~ /\bhelo=unkown\b/i # score FSL_HELO_UNKNOWN 0.001 -header FSL_HELO_HOME X-Spam-Relays-External =~ /\bhelo=\S+\.home\b/i +# header FSL_HELO_HOME X-Spam-Relays-External =~ /\bhelo=\S+\.home\b/i # score FSL_HELO_HOME 0.001 header FSL_HELO_SETUP X-Spam-Relays-External =~ /\bhelo=\S+\.setup\b/i diff --git rulesrc/sandbox/smf/20_smf.cf rulesrc/sandbox/smf/20_smf.cf index 6ae88eaa8..58ad698eb 100644 --- rulesrc/sandbox/smf/20_smf.cf +++ rulesrc/sandbox/smf/20_smf.cf @@ -98,6 +98,8 @@ score FSL_RCVD_UT_GT_5 0.01 header __FSL_COUNT_EXTERN X-Spam-Relays-External =~ /\[[^\]]+\]/ tflags __FSL_COUNT_EXTERN multiple +meta FSL_RCVD_EX_0 (__FSL_COUNT_EXTERN == 0) +score FSL_RCVD_EX_0 0.01 meta FSL_RCVD_EX_1 (__FSL_COUNT_EXTERN == 1) score FSL_RCVD_EX_1 0.01 meta FSL_RCVD_EX_2 (__FSL_COUNT_EXTERN == 2) @@ -110,6 +112,9 @@ meta FSL_RCVD_EX_5 (__FSL_COUNT_EXTERN == 5) score FSL_RCVD_EX_5 0.01 meta FSL_RCVD_EX_GT_5 (__FSL_COUNT_EXTERN > 5) +meta FSL_NO_RCVD_1 (__FSL_COUNT_TRUST > 0 && __FSL_COUNT_UNTRUST == 0) +score FSL_NO_RCVD_1 0.01 + rawbody __FSL_HTML_BLOCKS /<((?:div|span))[^>]+>.{1,10}?<\/\1[^>]*>.{0,20}?<\1/si tflags __FSL_HTML_BLOCKS multiple diff --git rulesrc/sandbox/smf/30_smf_nontest.cf rulesrc/sandbox/smf/30_smf_nontest.cf index fbe279fed..f3a8f50b5 100644 --- rulesrc/sandbox/smf/30_smf_nontest.cf +++ rulesrc/sandbox/smf/30_smf_nontest.cf @@ -1,8 +1,9 @@ # Rules that have tested OK in mass-checks # and can be considered for promotion. -body FSL_MY_NAME_IS /\bmy name is\b/i -describe FSL_MY_NAME_IS My name is ... +# DISABLED - VERY, VERY PRONE TO FPs +#body FSL_MY_NAME_IS /\bmy name is\b/i +#describe FSL_MY_NAME_IS My name is ... header __FSL_HAS_LIST_UNSUB exists:List-Unsubscribe meta FSL_BULK_SIG ((DCC_CHECK || RAZOR2_CHECK || PYZOR_CHECK) && !__FSL_HAS_LIST_UNSUB) diff --git rulesrc/scores/72_scores.cf rulesrc/scores/72_scores.cf index 23f9fe139..adb82158b 100644 --- rulesrc/scores/72_scores.cf +++ rulesrc/scores/72_scores.cf @@ -1,9 +1,8 @@ -score ACCT_PHISHING 1.000 1.000 1.000 1.000 score AC_BR_BONANZA 0.001 0.001 0.001 0.001 score AC_DIV_BONANZA 0.001 0.001 0.001 0.001 -score AC_HTML_NONSENSE_TAGS 1.000 1.000 1.000 1.000 +score AC_HTML_NONSENSE_TAGS 1.000 0.001 1.000 0.001 score AC_SPAMMY_URI_PATTERNS1 1.000 1.000 1.000 1.000 -score AC_SPAMMY_URI_PATTERNS10 3.999 3.461 3.999 3.461 +score AC_SPAMMY_URI_PATTERNS10 1.000 1.000 1.000 1.000 score AC_SPAMMY_URI_PATTERNS11 1.000 1.000 1.000 1.000 score AC_SPAMMY_URI_PATTERNS12 1.000 1.000 1.000 1.000 score AC_SPAMMY_URI_PATTERNS2 1.000 1.000 1.000 1.000 @@ -11,168 +10,135 @@ score AC_SPAMMY_URI_PATTERNS3 1.000 1.000 1.000 1.000 score AC_SPAMMY_URI_PATTERNS4 1.000 1.000 1.000 1.000 score AC_SPAMMY_URI_PATTERNS8 1.000 1.000 1.000 1.000 score AC_SPAMMY_URI_PATTERNS9 1.000 1.000 1.000 1.000 -score ADMITS_SPAM 2.899 0.001 2.899 0.001 score ADVANCE_FEE_2_NEW_FORM 1.000 1.000 1.000 1.000 -score ADVANCE_FEE_2_NEW_FRM_MNY 0.001 0.001 0.001 0.001 -score ADVANCE_FEE_2_NEW_MONEY 1.999 1.999 1.999 1.999 -score ADVANCE_FEE_3_NEW 3.499 2.496 3.499 2.496 -score ADVANCE_FEE_3_NEW_FRM_MNY 0.249 2.256 0.249 2.256 -score ADVANCE_FEE_3_NEW_MONEY 3.499 3.599 3.499 3.599 -score ADVANCE_FEE_4_NEW 2.699 2.699 2.699 2.699 -score ADVANCE_FEE_4_NEW_FRM_MNY 1.889 1.815 1.889 1.815 -score ADVANCE_FEE_4_NEW_MONEY 1.702 1.789 1.702 1.789 -score ADVANCE_FEE_5_NEW 3.099 2.999 3.099 2.999 -score ADVANCE_FEE_5_NEW_FORM 1.581 0.001 1.581 0.001 -score ADVANCE_FEE_5_NEW_FRM_MNY 1.485 1.802 1.485 1.802 -score ADVANCE_FEE_5_NEW_MONEY 3.899 3.699 3.899 3.699 -score AD_PREFS 0.001 0.001 0.001 0.001 -score AXB_XMAILER_MIMEOLE_OL_024C2 1.253 0.001 1.253 0.001 -score AXB_XMAILER_MIMEOLE_OL_1ECD5 2.189 1.191 2.189 1.191 -score AXB_XM_FORGED_OL2600 2.152 0.001 2.152 0.001 -score AXB_X_FF_SEZ_S 2.499 0.001 2.499 0.001 -score BIGNUM_EMAILS 2.801 2.999 2.801 2.999 -score BODY_URI_ONLY 0.999 0.673 0.999 0.673 -score CANT_SEE_AD 1.000 1.000 1.000 1.000 -score CK_HELO_DYNAMIC_SPLIT_IP 1.499 1.499 1.499 1.499 -score CK_HELO_GENERIC 0.249 0.250 0.249 0.250 -score COMMENT_GIBBERISH 1.499 1.499 1.499 1.499 -score COMPENSATION 1.499 1.499 1.499 1.499 -score DEAR_BENEFICIARY 3.041 3.499 3.041 3.499 -score DOS_OUTLOOK_TO_MX_IMAGE 2.400 0.001 2.400 0.001 -score DSN_NO_MIMEVERSION 1.999 1.999 1.999 1.999 -score DX_TEXT_05 2.071 1.218 2.071 1.218 -score FBI_MONEY 0.957 1.999 0.957 1.999 -score FBI_SPOOF 1.999 0.001 1.999 0.001 -score FILL_THIS_FORM 0.001 0.001 0.001 0.001 -score FORM_FRAUD 0.999 0.001 0.999 0.001 -score FORM_FRAUD_3 2.398 1.846 2.398 1.846 -score FORM_FRAUD_5 2.948 0.001 2.948 0.001 +score ADVANCE_FEE_2_NEW_MONEY 1.997 0.001 1.997 0.001 +score ADVANCE_FEE_3_NEW 3.496 0.001 3.496 0.001 +score ADVANCE_FEE_3_NEW_MONEY 2.796 0.001 2.796 0.001 +score ADVANCE_FEE_4_NEW 2.596 0.789 2.596 0.789 +score ADVANCE_FEE_5_NEW 2.996 0.001 2.996 0.001 +score AXB_XMAILER_MIMEOLE_OL_024C2 0.367 0.001 0.367 0.001 +score AXB_XMAILER_MIMEOLE_OL_1ECD5 0.181 0.001 0.181 0.001 +score AXB_XM_FORGED_OL2600 1.190 2.699 1.190 2.699 +score BODY_EMPTY 1.997 1.999 1.997 1.999 +score BODY_URI_ONLY 0.998 0.001 0.998 0.001 +score BOGUS_MSM_HDRS 0.909 0.001 0.909 0.001 +score CANT_SEE_AD 2.996 0.500 2.996 0.500 +score CK_HELO_DYNAMIC_SPLIT_IP 1.350 0.001 1.350 0.001 +score CK_HELO_GENERIC 0.249 0.249 0.249 0.249 +score CN_B2B_SPAMMER 0.001 0.001 0.001 0.001 +score COMMENT_GIBBERISH 1.498 1.499 1.498 1.499 +score DATE_IN_FUTURE_96_Q 3.296 3.299 3.296 3.299 +score ENCRYPTED_MESSAGE -1.000 -1.000 -1.000 -1.000 +score FBI_MONEY 0.696 0.001 0.696 0.001 +score FBI_SPOOF 1.999 1.999 1.999 1.999 +score FILL_THIS_FORM 2.748 0.001 2.748 0.001 +score FORM_FRAUD 0.998 0.001 0.998 0.001 +score FORM_FRAUD_3 2.696 0.001 2.696 0.001 +score FORM_FRAUD_5 0.209 0.001 0.209 0.001 score FORM_LOW_CONTRAST 1.000 1.000 1.000 1.000 -score FOUND_YOU 3.250 3.249 3.250 3.249 -score FREEMAIL_DOC_PDF_BCC 2.599 2.599 2.599 2.599 -score FREEMAIL_FORGED_FROMDOMAIN 0.250 0.249 0.250 0.249 +score FOUND_YOU 3.013 0.001 3.013 0.001 +score FREEMAIL_DOC_PDF_BCC 2.596 2.599 2.596 2.599 +score FREEMAIL_FORGED_FROMDOMAIN 0.001 0.199 0.001 0.199 +score FROM_IN_TO_AND_SUBJ 0.287 0.262 0.287 0.262 score FROM_MISSPACED 0.001 0.001 0.001 0.001 -score FROM_MISSP_DYNIP 1.826 1.651 1.826 1.651 -score FROM_MISSP_EH_MATCH 0.001 0.326 0.001 0.326 -score FROM_MISSP_FREEMAIL 3.999 0.001 3.999 0.001 +score FROM_MISSP_FREEMAIL 3.595 0.001 3.595 0.001 score FROM_MISSP_MSFT 0.001 0.001 0.001 0.001 -score FROM_MISSP_PHISH 3.089 3.338 3.089 3.338 -score FROM_MISSP_REPLYTO 2.600 0.001 2.600 0.001 +score FROM_MISSP_REPLYTO 0.001 0.001 0.001 0.001 score FROM_MISSP_SPF_FAIL 0.001 1.000 0.001 1.000 -score FROM_MISSP_TO_UNDISC 0.001 0.001 0.001 0.001 +score FROM_MISSP_TO_UNDISC 1.438 0.001 1.438 0.001 score FROM_MISSP_USER 0.001 0.001 0.001 0.001 score FROM_MISSP_XPRIO 0.001 0.001 0.001 0.001 -score FSL_BOTSPAM_1 2.412 0.589 2.412 0.589 +score FROM_WORDY 2.497 0.001 2.497 0.001 +score FROM_WORDY_SHORT 1.000 1.000 1.000 1.000 score FSL_CTYPE_WIN1251 0.001 0.001 0.001 0.001 -score FSL_HELO_BARE_IP_2 1.999 1.999 1.999 1.999 -score FSL_HELO_FAKE 2.908 1.390 2.908 1.390 -score FSL_NEW_HELO_USER 1.999 0.631 1.999 0.631 -score FUZZY_CLICK_HERE 2.499 0.001 2.499 0.001 -score FUZZY_DR_OZ 2.599 2.699 2.599 2.699 +score FSL_HELO_BARE_IP_2 1.498 1.499 1.498 1.499 +score FSL_NEW_HELO_USER 0.083 0.001 0.083 0.001 score GOOGLE_DOCS_PHISH 1.000 1.000 1.000 1.000 score GOOGLE_DOCS_PHISH_MANY 1.000 1.000 1.000 1.000 score GOOG_MALWARE_DNLD 1.000 1.000 1.000 1.000 +score HDRS_LCASE 0.099 0.100 0.099 0.100 score HEADER_FROM_DIFFERENT_DOMAINS 0.001 0.001 0.001 0.001 -score HEXHASH_WORD 2.600 1.000 2.600 1.000 -score HK_LOTTO 0.511 0.770 0.511 0.770 -score HK_LOTTO_NAME 1.000 0.999 1.000 0.999 -score HK_NAME_FM_MR_MRS 1.499 1.459 1.499 1.459 -score HK_NAME_FROM 1.000 0.985 1.000 0.985 -score HK_NAME_MR_MRS 0.999 0.001 0.999 0.001 -score HK_RANDOM_FROM 1.000 1.000 1.000 1.000 -score HK_RANDOM_REPLYTO 1.000 0.999 1.000 0.999 -score HK_SCAM_N1 2.599 2.699 2.599 2.699 -score HK_SCAM_N13 1.306 2.599 1.306 2.599 -score HK_SCAM_N2 3.499 2.516 3.499 2.516 -score HK_SCAM_N3 2.246 1.116 2.246 1.116 -score HK_SCAM_N8 0.808 1.461 0.808 1.461 -score HK_SPAMMY_FILENAME 2.499 0.001 2.499 0.001 +score HELO_MISC_IP 0.248 0.250 0.248 0.250 +score HEXHASH_WORD 1.000 1.000 1.000 1.000 +score HK_RANDOM_FROM 0.998 0.001 0.998 0.001 +score HK_SCAM_N15 1.935 2.499 1.935 2.499 +score HK_SCAM_N2 3.249 0.001 3.249 0.001 score HTML_OFF_PAGE 1.000 1.000 1.000 1.000 +score IMG_DIRECT_TO_MX 2.397 2.400 2.397 2.400 score LIST_PRTL_PUMPDUMP 1.000 1.000 1.000 1.000 -score LIST_PRTL_SAME_USER 1.000 1.000 1.000 1.000 -score LONG_HEX_URI 2.999 2.737 2.999 2.737 -score LONG_IMG_URI 1.000 1.000 1.000 1.000 +score LIST_PRTL_SAME_USER 0.001 0.286 0.001 0.286 +score LONG_HEX_URI 2.194 2.290 2.194 2.290 +score LONG_IMG_URI 0.553 0.100 0.553 0.100 score LOTS_OF_MONEY 0.001 0.001 0.001 0.001 -score LOTTO_AGENT 2.499 2.499 2.499 2.499 -score LOTTO_DEPT 0.728 0.001 0.728 0.001 +score LOTTO_AGENT 1.498 1.499 1.498 1.499 +score LOTTO_DEPT 0.001 0.001 0.001 0.001 score LUCRATIVE 1.000 1.000 1.000 1.000 -score MAILER_EQ_ORG 2.499 0.001 2.499 0.001 -score MALFORMED_FREEMAIL 2.546 1.429 2.546 1.429 -score MANY_SPAN_IN_TEXT 2.299 0.001 2.299 0.001 -score MIME_NO_TEXT 2.000 1.000 2.000 1.000 -score MONEY_ATM_CARD 2.690 1.476 2.690 1.476 -score MONEY_BARRISTER 0.999 0.999 0.999 0.999 -score MONEY_FORM 0.552 0.001 0.552 0.001 -score MONEY_FORM_SHORT 2.481 1.260 2.481 1.260 -score MONEY_FRAUD_3 2.718 3.099 2.718 3.099 -score MONEY_FRAUD_5 1.862 0.001 1.862 0.001 -score MONEY_FRAUD_8 2.801 0.504 2.801 0.504 -score MONEY_FROM_41 1.999 0.686 1.999 0.686 -score MONEY_FROM_MISSP 0.001 0.001 0.001 0.001 -score NAME_EMAIL_DIFF 3.899 0.374 3.899 0.374 -score NSL_RCVD_FROM_USER 0.001 0.020 0.001 0.020 -score NSL_RCVD_HELO_USER 0.001 2.523 0.001 2.523 -score OBFU_ATTACH_MISSP 2.227 0.001 2.227 0.001 -score OBFU_TEXT_ATTACH 2.075 0.001 2.075 0.001 -score PDS_FROM_2_EMAILS 4.299 0.675 4.299 0.675 -score PHP_NOVER_MUA 3.499 1.000 3.499 1.000 -score PP_MIME_FAKE_ASCII_TEXT 0.999 0.243 0.999 0.243 +score MIMEOLE_DIRECT_TO_MX 1.445 0.381 1.445 0.381 +score MIME_NO_TEXT 1.000 1.000 1.000 1.000 +score MONEY_FRAUD_3 2.896 0.001 2.896 0.001 +score MONEY_FRAUD_5 3.096 0.001 3.096 0.001 +score MONEY_FRAUD_8 2.548 0.001 2.548 0.001 +score MONEY_LOTTERY 2.498 1.611 2.498 1.611 +score MSGID_NOFQDN1 2.395 3.299 2.395 3.299 +score MSM_PRIO_REPTO 2.497 0.180 2.497 0.180 +score NSL_RCVD_FROM_USER 0.548 0.001 0.548 0.001 +score NSL_RCVD_HELO_USER 1.273 0.001 1.273 0.001 +score PHP_NOVER_MUA 1.000 1.000 1.000 1.000 +score PHP_ORIG_SCRIPT 0.502 2.499 0.502 2.499 +score PHP_SCRIPT_MUA 1.000 1.000 1.000 1.000 +score PP_MIME_FAKE_ASCII_TEXT 0.429 0.001 0.429 0.001 score PP_TOO_MUCH_UNICODE02 0.500 0.500 0.500 0.500 score PP_TOO_MUCH_UNICODE05 1.000 1.000 1.000 1.000 score PUMPDUMP 1.000 1.000 1.000 1.000 score PUMPDUMP_MULTI 1.000 1.000 1.000 1.000 score RAND_HEADER_MANY 1.000 1.000 1.000 1.000 -score RCVD_DBL_DQ 2.599 2.699 2.599 2.699 score RCVD_IN_MSPIKE_BL 0.001 0.010 0.001 0.010 -score RCVD_IN_MSPIKE_H2 0.001 -0.211 0.001 -0.211 +score RCVD_IN_MSPIKE_H2 0.001 -2.800 0.001 -2.800 score RCVD_IN_MSPIKE_H3 0.001 -0.010 0.001 -0.010 score RCVD_IN_MSPIKE_H4 0.001 -0.010 0.001 -0.010 score RCVD_IN_MSPIKE_H5 0.001 -1.000 0.001 -1.000 score RCVD_IN_MSPIKE_L2 0.001 0.001 0.001 0.001 -score RCVD_IN_MSPIKE_L3 0.001 1.606 0.001 1.606 -score RCVD_IN_MSPIKE_L4 0.001 2.088 0.001 2.088 -score RCVD_IN_MSPIKE_L5 0.001 2.566 0.001 2.566 +score RCVD_IN_MSPIKE_L3 0.001 0.001 0.001 0.001 +score RCVD_IN_MSPIKE_L4 0.001 0.001 0.001 0.001 +score RCVD_IN_MSPIKE_L5 0.001 0.001 0.001 0.001 score RCVD_IN_MSPIKE_WL 0.001 -0.010 0.001 -0.010 score RCVD_IN_MSPIKE_ZBI 0.001 0.001 0.001 0.001 -score RISK_FREE 2.601 3.058 2.601 3.058 -score SERGIO_SUBJECT_PORN014 2.602 0.064 2.602 0.064 -score SERGIO_SUBJECT_VIAGRA01 2.699 2.242 2.699 2.242 -score SHARE_50_50 1.932 0.001 1.932 0.001 -score SHORTENED_URL_SRC 0.814 0.368 0.814 0.368 -score SINGLETS_LOW_CONTRAST 2.086 1.703 2.086 1.703 -score STOCK_LOW_CONTRAST 1.000 1.000 1.000 1.000 -score STYLE_GIBBERISH 2.801 3.499 2.801 3.499 +score RP_MATCHES_RCVD -1.050 -0.001 -1.050 -0.001 +score SHARE_50_50 2.121 1.818 2.121 1.818 +score SPOOFED_FREEM_REPTO 2.498 1.368 2.498 1.368 +score SPOOFED_FREEM_REPTO_CHN 1.000 1.000 1.000 1.000 +score STATIC_XPRIO_OLE 1.997 0.001 1.997 0.001 +score STOCK_LOW_CONTRAST 2.030 2.347 2.030 2.347 +score STOCK_TIP 1.000 1.000 1.000 1.000 +score STYLE_GIBBERISH 2.800 3.093 2.800 3.093 +score SURBL_BLOCKED 0.001 0.001 0.001 0.001 score SYSADMIN 1.000 1.000 1.000 1.000 -score TAB_IN_FROM 0.499 0.499 0.499 0.499 -score THIS_AD 2.002 1.673 2.002 1.673 -score TO_EQ_FM_DIRECT_MX 3.050 0.449 3.050 0.449 +score THIS_AD 0.596 2.200 0.596 2.200 +score TO_EQ_FM_DIRECT_MX 2.497 0.622 2.497 0.622 score TO_EQ_FM_DOM_SPF_FAIL 0.001 0.001 0.001 0.001 score TO_EQ_FM_SPF_FAIL 0.001 0.001 0.001 0.001 -score TO_IN_SUBJ 0.099 0.100 0.099 0.100 -score TO_NO_BRKTS_FROM_MSSP 2.499 0.699 2.499 0.699 -score TO_NO_BRKTS_HTML_IMG 1.911 1.483 1.911 1.483 -score TO_NO_BRKTS_HTML_ONLY 1.800 0.591 1.800 0.591 -score TO_NO_BRKTS_MSFT 2.499 2.500 2.499 2.500 -score TO_NO_BRKTS_NORDNS_HTML 0.001 0.001 0.001 0.001 -score TO_NO_BRKTS_PCNT 2.500 1.137 2.500 1.137 -score TVD_PH_BODY_META 0.884 1.984 0.884 1.984 -score TVD_SPACE_ENCODED 2.499 2.499 2.499 2.499 -score TVD_SPACE_ENC_FM_MIME 1.999 1.999 1.999 1.999 -score TVD_SPACE_RATIO_MINFP 2.749 2.749 2.749 2.749 +score TO_IN_SUBJ 0.099 0.099 0.099 0.099 +score TO_NO_BRKTS_FROM_MSSP 0.001 0.001 0.001 0.001 +score TO_NO_BRKTS_HTML_IMG 0.001 2.000 0.001 2.000 +score TO_NO_BRKTS_HTML_ONLY 1.997 0.001 1.997 0.001 +score TO_NO_BRKTS_MSFT 2.497 0.001 2.497 0.001 +score TO_NO_BRKTS_NORDNS_HTML 0.398 0.001 0.398 0.001 +score TO_NO_BRKTS_PCNT 2.497 0.001 2.497 0.001 +score TVD_SPACE_ENCODED 2.497 0.001 2.497 0.001 +score TVD_SPACE_ENC_FM_MIME 1.997 0.001 1.997 0.001 +score TVD_SPACE_RATIO_MINFP 2.497 0.001 2.497 0.001 score TW_GIBBERISH_MANY 1.000 1.000 1.000 1.000 score UC_GIBBERISH_OBFU 1.000 1.000 1.000 1.000 -score URI_DQ_UNSUB 2.499 2.599 2.499 2.599 -score URI_GOOGLE_PROXY 2.299 0.001 2.299 0.001 -score URI_ONLY_MSGID_MALF 0.403 2.002 0.403 2.002 -score URI_OPTOUT_3LD 2.000 0.001 2.000 0.001 -score URI_OPTOUT_USME 2.999 0.001 2.999 0.001 -score URI_PHISH 1.400 0.891 1.400 0.891 -score URI_TRY_3LD 0.001 0.001 0.001 0.001 -score URI_TRY_USME 2.999 0.001 2.999 0.001 -score URI_WPADMIN 2.599 2.699 2.599 2.699 -score URI_WP_DIRINDEX 2.873 3.000 2.873 3.000 -score URI_WP_HACKED 2.999 2.999 2.999 2.999 -score URI_WP_HACKED_2 1.999 1.999 1.999 1.999 -score XM_PHPMAILER_FORGED 2.399 0.001 2.399 0.001 -score XPRIO 0.999 0.999 0.999 0.999 -score YOU_INHERIT 2.750 2.900 2.750 2.900 +score URI_DATA 1.000 1.000 1.000 1.000 +score URI_GOOGLE_PROXY 0.710 1.378 0.710 1.378 +score URI_ONLY_MSGID_MALF 0.001 1.191 0.001 1.191 +score URI_OPTOUT_3LD 1.000 1.000 1.000 1.000 +score URI_PHISH 3.995 3.999 3.995 3.999 +score URI_TRY_3LD 0.195 0.001 0.195 0.001 +score URI_TRY_USME 0.001 0.001 0.001 0.001 +score URI_WPADMIN 3.396 3.014 3.396 3.014 +score URI_WP_DIRINDEX 1.000 1.000 1.000 1.000 +score URI_WP_HACKED 2.996 3.000 2.996 3.000 +score URI_WP_HACKED_2 1.187 1.764 1.187 1.764 +score XPRIO 2.248 2.249 2.248 2.249 +score XPRIO_SHORT_SUBJ 1.000 1.000 1.000 1.000 diff --git rulesrc/scores/scores-set0 rulesrc/scores/scores-set0 index f1954ab3e..cac8c40c6 100644 --- rulesrc/scores/scores-set0 +++ rulesrc/scores/scores-set0 @@ -1,142 +1,98 @@ -# Using score set 0 logs for revision 1675274 from: -# ham-axb-8mile.log ham-axb-coi-bulk.log ham-axb-generic.log ham-axb-ham-misc.log ham-bernie-fsf.log ham-bernie-it_batt.log ham-bernie-mix.log ham-bpoliakoff.log ham-darxus.log ham-dwarren.log ham-grenier.log ham-jarif.log ham-kam-ninja.log ham-kpg-core.log ham-mas-cps.log ham-mas-mas.log ham-mmiroslaw-ham.log ham-mmiroslaw-spam.log ham-zmi.log spam-axb-8mile.log spam-axb-coi-bulk.log spam-axb-generic.log spam-axb-ham-misc.log spam-bernie-fsf.log spam-bernie-it_batt.log spam-bernie-mix.log spam-bpoliakoff.log spam-darxus.log spam-dwarren.log spam-grenier.log spam-jarif.log spam-kam-ninja.log spam-kpg-core.log spam-mas-cps.log spam-mas-mas.log spam-mmiroslaw-ham.log spam-mmiroslaw-spam.log spam-zmi.log +# Using score set 0 logs for revision 1786853 from: +# ham-axb-coi-bulk.log ham-axb-generic.log ham-axb-ham-misc.log ham-axb-ninja.log ham-darxus.log ham-ena.log ham-grenier.log ham-jarif.log ham-kgolding.log ham-thendrikx.log ham-zmi.log spam-axb-coi-bulk.log spam-axb-generic.log spam-axb-ham-misc.log spam-axb-ninja.log spam-darxus.log spam-ena.log spam-grenier.log spam-jarif.log spam-kgolding.log spam-thendrikx.log spam-zmi.log score AC_BR_BONANZA 0.001 -score AC_DIV_BONANZA 0.001 # force non-zero -score AC_SPAMMY_URI_PATTERNS10 3.999 -score ADMITS_SPAM 2.899 -score ADVANCE_FEE_2_NEW_FRM_MNY 0.001 -score ADVANCE_FEE_2_NEW_MONEY 1.999 -score ADVANCE_FEE_3_NEW 3.499 -score ADVANCE_FEE_3_NEW_FRM_MNY 0.249 -score ADVANCE_FEE_3_NEW_MONEY 3.499 -score ADVANCE_FEE_4_NEW 2.699 -score ADVANCE_FEE_4_NEW_FRM_MNY 1.889 -score ADVANCE_FEE_4_NEW_MONEY 1.702 -score ADVANCE_FEE_5_NEW 3.099 -score ADVANCE_FEE_5_NEW_FORM 1.581 -score ADVANCE_FEE_5_NEW_FRM_MNY 1.485 -score ADVANCE_FEE_5_NEW_MONEY 3.899 -score AD_PREFS 0.001 -score AXB_XMAILER_MIMEOLE_OL_024C2 1.253 -score AXB_XMAILER_MIMEOLE_OL_1ECD5 2.189 -score AXB_XM_FORGED_OL2600 2.152 -score AXB_X_FF_SEZ_S 2.499 -score BIGNUM_EMAILS 2.801 -score BODY_URI_ONLY 0.999 -score CK_HELO_DYNAMIC_SPLIT_IP 1.499 +score AC_DIV_BONANZA 0.001 +score ADVANCE_FEE_2_NEW_MONEY 1.997 +score ADVANCE_FEE_3_NEW 3.496 +score ADVANCE_FEE_3_NEW_MONEY 2.796 +score ADVANCE_FEE_4_NEW 2.596 +score ADVANCE_FEE_5_NEW 2.996 +score AXB_XMAILER_MIMEOLE_OL_024C2 0.367 +score AXB_XMAILER_MIMEOLE_OL_1ECD5 0.181 +score AXB_XM_FORGED_OL2600 1.190 +score BODY_EMPTY 1.997 +score BODY_URI_ONLY 0.998 +score BOGUS_MSM_HDRS 0.909 +score CANT_SEE_AD 2.996 +score CK_HELO_DYNAMIC_SPLIT_IP 1.350 score CK_HELO_GENERIC 0.249 -score COMMENT_GIBBERISH 1.499 -score COMPENSATION 1.499 -score DEAR_BENEFICIARY 3.041 -score DOS_OUTLOOK_TO_MX_IMAGE 2.400 -score DSN_NO_MIMEVERSION 1.999 -score DX_TEXT_05 2.071 -score FBI_MONEY 0.957 +score COMMENT_GIBBERISH 1.498 +score DATE_IN_FUTURE_96_Q 3.296 +score FBI_MONEY 0.696 score FBI_SPOOF 1.999 -score FILL_THIS_FORM 0.001 -score FORM_FRAUD 0.999 -score FORM_FRAUD_3 2.398 -score FORM_FRAUD_5 2.948 -score FOUND_YOU 3.250 -score FREEMAIL_DOC_PDF_BCC 2.599 -score FREEMAIL_FORGED_FROMDOMAIN 0.250 +score FILL_THIS_FORM 2.748 +score FORM_FRAUD 0.998 +score FORM_FRAUD_3 2.696 +score FORM_FRAUD_5 0.209 +score FOUND_YOU 3.013 +score FREEMAIL_DOC_PDF_BCC 2.596 +score FREEMAIL_FORGED_FROMDOMAIN 0.001 +score FROM_IN_TO_AND_SUBJ 0.287 score FROM_MISSPACED 0.001 -score FROM_MISSP_DYNIP 1.826 -score FROM_MISSP_EH_MATCH 0.001 -score FROM_MISSP_FREEMAIL 3.999 -score FROM_MISSP_MSFT 0.001 -score FROM_MISSP_PHISH 3.089 -score FROM_MISSP_REPLYTO 2.600 -score FROM_MISSP_TO_UNDISC 0.001 # force non-zero +score FROM_MISSP_FREEMAIL 3.595 +score FROM_MISSP_MSFT 0.001 # force non-zero +score FROM_MISSP_REPLYTO 0.001 +score FROM_MISSP_TO_UNDISC 1.438 score FROM_MISSP_USER 0.001 score FROM_MISSP_XPRIO 0.001 -score FSL_BOTSPAM_1 2.412 +score FROM_WORDY 2.497 score FSL_CTYPE_WIN1251 0.001 -score FSL_HELO_BARE_IP_2 1.999 -score FSL_HELO_FAKE 2.908 -score FSL_NEW_HELO_USER 1.999 -score FUZZY_CLICK_HERE 2.499 -score FUZZY_DR_OZ 2.599 +score FSL_HELO_BARE_IP_2 1.498 +score FSL_NEW_HELO_USER 0.083 +score HDRS_LCASE 0.099 score HEADER_FROM_DIFFERENT_DOMAINS 0.001 # force non-zero -score HEXHASH_WORD 2.600 -score HK_LOTTO 0.511 -score HK_LOTTO_NAME 1.000 -score HK_NAME_FM_MR_MRS 1.499 -score HK_NAME_MR_MRS 0.999 -score HK_RANDOM_FROM 1.000 -score HK_RANDOM_REPLYTO 1.000 -score HK_SCAM_N1 2.599 -score HK_SCAM_N13 1.306 -score HK_SCAM_N2 3.499 -score HK_SCAM_N3 2.246 -score HK_SCAM_N8 0.808 -score HK_SPAMMY_FILENAME 2.499 -score LONG_HEX_URI 2.999 +score HELO_MISC_IP 0.248 +score HK_RANDOM_FROM 0.998 +score HK_SCAM_N15 1.935 +score HK_SCAM_N2 3.249 +score IMG_DIRECT_TO_MX 2.397 +score LIST_PRTL_SAME_USER 0.001 +score LONG_HEX_URI 2.194 +score LONG_IMG_URI 0.553 score LOTS_OF_MONEY 0.001 # force non-zero -score LOTTO_AGENT 2.499 -score LOTTO_DEPT 0.728 -score MAILER_EQ_ORG 2.499 -score MALFORMED_FREEMAIL 2.546 -score MANY_SPAN_IN_TEXT 2.299 -score MIME_NO_TEXT 2.000 -score MONEY_ATM_CARD 2.690 -score MONEY_BARRISTER 0.999 -score MONEY_FORM 0.552 -score MONEY_FORM_SHORT 2.481 -score MONEY_FRAUD_3 2.718 -score MONEY_FRAUD_5 1.862 -score MONEY_FRAUD_8 2.801 -score MONEY_FROM_41 1.999 -score MONEY_FROM_MISSP 0.001 -score NAME_EMAIL_DIFF 3.899 -score NSL_RCVD_FROM_USER 0.001 -score NSL_RCVD_HELO_USER 0.001 -score OBFU_ATTACH_MISSP 2.227 -score OBFU_TEXT_ATTACH 2.075 -score PDS_FROM_2_EMAILS 4.299 -score PHP_NOVER_MUA 3.499 -score PP_MIME_FAKE_ASCII_TEXT 0.999 -score RCVD_DBL_DQ 2.599 -score RISK_FREE 2.601 -score SERGIO_SUBJECT_PORN014 2.602 -score SERGIO_SUBJECT_VIAGRA01 2.699 -score SHARE_50_50 1.932 -score SHORTENED_URL_SRC 0.814 -score SINGLETS_LOW_CONTRAST 2.086 -score STYLE_GIBBERISH 2.801 -score TAB_IN_FROM 0.499 -score THIS_AD 2.002 -score TO_EQ_FM_DIRECT_MX 3.050 +score LOTTO_AGENT 1.498 +score LOTTO_DEPT 0.001 +score MIMEOLE_DIRECT_TO_MX 1.445 +score MONEY_FRAUD_3 2.896 +score MONEY_FRAUD_5 3.096 +score MONEY_FRAUD_8 2.548 +score MONEY_LOTTERY 2.498 +score MSGID_NOFQDN1 2.395 +score MSM_PRIO_REPTO 2.497 +score NSL_RCVD_FROM_USER 0.548 +score NSL_RCVD_HELO_USER 1.273 +score PHP_ORIG_SCRIPT 0.502 +score PP_MIME_FAKE_ASCII_TEXT 0.429 +score RP_MATCHES_RCVD -1.050 +score SHARE_50_50 2.121 +score SPOOFED_FREEM_REPTO 2.498 +score STATIC_XPRIO_OLE 1.997 +score STOCK_LOW_CONTRAST 2.030 +score STYLE_GIBBERISH 2.800 +score THIS_AD 0.596 +score TO_EQ_FM_DIRECT_MX 2.497 score TO_IN_SUBJ 0.099 -score TO_NO_BRKTS_FROM_MSSP 2.499 -score TO_NO_BRKTS_HTML_IMG 1.911 -score TO_NO_BRKTS_HTML_ONLY 1.800 -score TO_NO_BRKTS_MSFT 2.499 -score TO_NO_BRKTS_NORDNS_HTML 0.001 -score TO_NO_BRKTS_PCNT 2.500 -score TVD_PH_BODY_META 0.884 -score TVD_SPACE_ENCODED 2.499 -score TVD_SPACE_ENC_FM_MIME 1.999 -score TVD_SPACE_RATIO_MINFP 2.749 -score URI_DQ_UNSUB 2.499 -score URI_GOOGLE_PROXY 2.299 -score URI_ONLY_MSGID_MALF 0.403 -score URI_OPTOUT_3LD 2.000 -score URI_OPTOUT_USME 2.999 -score URI_PHISH 1.400 -score URI_TRY_3LD 0.001 -score URI_TRY_USME 2.999 -score URI_WPADMIN 2.599 -score URI_WP_DIRINDEX 2.873 -score URI_WP_HACKED 2.999 -score URI_WP_HACKED_2 1.999 -score XM_PHPMAILER_FORGED 2.399 -score XPRIO 0.999 -score YOU_INHERIT 2.750 -score ACCT_PHISHING 1.000 +score TO_NO_BRKTS_FROM_MSSP 0.001 +score TO_NO_BRKTS_HTML_IMG 0.001 +score TO_NO_BRKTS_HTML_ONLY 1.997 +score TO_NO_BRKTS_MSFT 2.497 +score TO_NO_BRKTS_NORDNS_HTML 0.398 +score TO_NO_BRKTS_PCNT 2.497 +score TVD_SPACE_ENCODED 2.497 +score TVD_SPACE_ENC_FM_MIME 1.997 +score TVD_SPACE_RATIO_MINFP 2.497 +score URI_GOOGLE_PROXY 0.710 +score URI_ONLY_MSGID_MALF 0.001 # force non-zero +score URI_PHISH 3.995 +score URI_TRY_3LD 0.195 +score URI_WPADMIN 3.396 +score URI_WP_HACKED 2.996 +score URI_WP_HACKED_2 1.187 +score XPRIO 2.248 score AC_HTML_NONSENSE_TAGS 1.000 score AC_SPAMMY_URI_PATTERNS1 1.000 +score AC_SPAMMY_URI_PATTERNS10 1.000 score AC_SPAMMY_URI_PATTERNS11 1.000 score AC_SPAMMY_URI_PATTERNS12 1.000 score AC_SPAMMY_URI_PATTERNS2 1.000 @@ -145,26 +101,33 @@ score AC_SPAMMY_URI_PATTERNS4 1.000 score AC_SPAMMY_URI_PATTERNS8 1.000 score AC_SPAMMY_URI_PATTERNS9 1.000 score ADVANCE_FEE_2_NEW_FORM 1.000 -score CANT_SEE_AD 1.000 +score ENCRYPTED_MESSAGE -1.000 score FORM_LOW_CONTRAST 1.000 +score FROM_WORDY_SHORT 1.000 score GOOGLE_DOCS_PHISH 1.000 score GOOGLE_DOCS_PHISH_MANY 1.000 score GOOG_MALWARE_DNLD 1.000 -score HK_NAME_FROM 1.000 +score HEXHASH_WORD 1.000 score HTML_OFF_PAGE 1.000 score LIST_PRTL_PUMPDUMP 1.000 -score LIST_PRTL_SAME_USER 1.000 -score LONG_IMG_URI 1.000 score LUCRATIVE 1.000 +score MIME_NO_TEXT 1.000 +score PHP_NOVER_MUA 1.000 +score PHP_SCRIPT_MUA 1.000 score PP_TOO_MUCH_UNICODE02 0.500 score PP_TOO_MUCH_UNICODE05 1.000 score PUMPDUMP 1.000 score PUMPDUMP_MULTI 1.000 score RAND_HEADER_MANY 1.000 -score STOCK_LOW_CONTRAST 1.000 +score SPOOFED_FREEM_REPTO_CHN 1.000 +score STOCK_TIP 1.000 score SYSADMIN 1.000 score TW_GIBBERISH_MANY 1.000 score UC_GIBBERISH_OBFU 1.000 +score URI_DATA 1.000 +score URI_OPTOUT_3LD 1.000 +score URI_WP_DIRINDEX 1.000 +score XPRIO_SHORT_SUBJ 1.000 # in active.list but have no hits in recent corpus score FROM_MISSP_SPF_FAIL 0.001 # force non-zero score RCVD_IN_MSPIKE_BL 0.001 # force non-zero @@ -178,5 +141,6 @@ score RCVD_IN_MSPIKE_L4 0.001 # force non-zero score RCVD_IN_MSPIKE_L5 0.001 # force non-zero score RCVD_IN_MSPIKE_WL 0.001 # force non-zero score RCVD_IN_MSPIKE_ZBI 0.001 # force non-zero +score SURBL_BLOCKED 0.001 # force non-zero score TO_EQ_FM_DOM_SPF_FAIL 0.001 # force non-zero score TO_EQ_FM_SPF_FAIL 0.001 # force non-zero diff --git rulesrc/scores/scores-set1 rulesrc/scores/scores-set1 index 14c21518a..862b74f9b 100644 --- rulesrc/scores/scores-set1 +++ rulesrc/scores/scores-set1 @@ -1,133 +1,104 @@ -# Using score set 1 logs for revision 1674464 from: -# ham-net-axb-8mile.log ham-net-axb-coi-bulk.log ham-net-axb-generic.log ham-net-axb-ham-misc.log ham-net-bernie-fsf.log ham-net-bernie-it_batt.log ham-net-bernie-mix.log ham-net-bpoliakoff.log ham-net-darxus.log ham-net-dwarren.log ham-net-grenier.log ham-net-jarif.log ham-net-kam-ninja.log ham-net-kpg-core.log ham-net-mas-cps.log ham-net-mas-mas.log ham-net-zmi.log spam-net-axb-8mile.log spam-net-axb-coi-bulk.log spam-net-axb-generic.log spam-net-axb-ham-misc.log spam-net-bernie-fsf.log spam-net-bernie-it_batt.log spam-net-bernie-mix.log spam-net-bpoliakoff.log spam-net-darxus.log spam-net-dwarren.log spam-net-grenier.log spam-net-jarif.log spam-net-kam-ninja.log spam-net-kpg-core.log spam-net-mas-cps.log spam-net-mas-mas.log spam-net-zmi.log +# Using score set 1 logs for revision 1786494 from: +# ham-net-axb-coi-bulk.log ham-net-axb-generic.log ham-net-axb-ham-misc.log ham-net-axb-ninja.log ham-net-bb-guenther_fraud.log ham-net-darxus.log ham-net-dwarren.log ham-net-ena.log ham-net-grenier.log ham-net-jarif.log ham-net-kgolding.log ham-net-thendrikx.log ham-net-zmi.log spam-net-axb-coi-bulk.log spam-net-axb-generic.log spam-net-axb-ham-misc.log spam-net-axb-ninja.log spam-net-bb-guenther_fraud.log spam-net-darxus.log spam-net-dwarren.log spam-net-ena.log spam-net-grenier.log spam-net-jarif.log spam-net-kgolding.log spam-net-thendrikx.log spam-net-zmi.log -score AC_BR_BONANZA 0.001 -score AC_DIV_BONANZA 0.001 # force non-zero -score AC_SPAMMY_URI_PATTERNS10 3.461 -score ADMITS_SPAM 0.001 # force non-zero -score ADVANCE_FEE_2_NEW_FRM_MNY 0.001 # force non-zero -score ADVANCE_FEE_2_NEW_MONEY 1.999 -score ADVANCE_FEE_3_NEW 2.496 -score ADVANCE_FEE_3_NEW_FRM_MNY 2.256 -score ADVANCE_FEE_3_NEW_MONEY 3.599 -score ADVANCE_FEE_4_NEW 2.699 -score ADVANCE_FEE_4_NEW_FRM_MNY 1.815 -score ADVANCE_FEE_4_NEW_MONEY 1.789 -score ADVANCE_FEE_5_NEW 2.999 -score ADVANCE_FEE_5_NEW_FRM_MNY 1.802 -score ADVANCE_FEE_5_NEW_MONEY 3.699 -score AXB_XMAILER_MIMEOLE_OL_024C2 0.001 # force non-zero -score AXB_XMAILER_MIMEOLE_OL_1ECD5 1.191 -score BIGNUM_EMAILS 2.999 -score BODY_URI_ONLY 0.673 -score CK_HELO_DYNAMIC_SPLIT_IP 1.499 -score CK_HELO_GENERIC 0.250 +score AC_BR_BONANZA 0.001 # force non-zero +score AC_DIV_BONANZA 0.001 +score AC_HTML_NONSENSE_TAGS 0.001 +score ADVANCE_FEE_2_NEW_MONEY 0.001 +score ADVANCE_FEE_3_NEW 0.001 # force non-zero +score ADVANCE_FEE_3_NEW_MONEY 0.001 +score ADVANCE_FEE_4_NEW 0.789 +score ADVANCE_FEE_5_NEW 0.001 # force non-zero +score AXB_XMAILER_MIMEOLE_OL_024C2 0.001 +score AXB_XMAILER_MIMEOLE_OL_1ECD5 0.001 +score AXB_XM_FORGED_OL2600 2.699 +score BODY_EMPTY 1.999 +score BODY_URI_ONLY 0.001 # force non-zero +score BOGUS_MSM_HDRS 0.001 +score CANT_SEE_AD 0.500 +score CK_HELO_DYNAMIC_SPLIT_IP 0.001 +score CK_HELO_GENERIC 0.249 +score CN_B2B_SPAMMER 0.001 score COMMENT_GIBBERISH 1.499 -score COMPENSATION 1.499 -score DEAR_BENEFICIARY 3.499 -score DSN_NO_MIMEVERSION 1.999 -score DX_TEXT_05 1.218 -score FBI_MONEY 1.999 -score FBI_SPOOF 0.001 +score DATE_IN_FUTURE_96_Q 3.299 +score FBI_MONEY 0.001 +score FBI_SPOOF 1.999 score FILL_THIS_FORM 0.001 -score FORM_FRAUD 0.001 # force non-zero -score FORM_FRAUD_3 1.846 +score FORM_FRAUD 0.001 +score FORM_FRAUD_3 0.001 score FORM_FRAUD_5 0.001 -score FOUND_YOU 3.249 +score FOUND_YOU 0.001 score FREEMAIL_DOC_PDF_BCC 2.599 -score FREEMAIL_FORGED_FROMDOMAIN 0.249 +score FREEMAIL_FORGED_FROMDOMAIN 0.199 +score FROM_IN_TO_AND_SUBJ 0.262 score FROM_MISSPACED 0.001 -score FROM_MISSP_DYNIP 1.651 -score FROM_MISSP_EH_MATCH 0.326 score FROM_MISSP_FREEMAIL 0.001 -score FROM_MISSP_MSFT 0.001 -score FROM_MISSP_PHISH 3.338 -score FROM_MISSP_REPLYTO 0.001 # force non-zero +score FROM_MISSP_MSFT 0.001 # force non-zero +score FROM_MISSP_REPLYTO 0.001 score FROM_MISSP_TO_UNDISC 0.001 score FROM_MISSP_USER 0.001 score FROM_MISSP_XPRIO 0.001 -score FSL_BOTSPAM_1 0.589 +score FROM_WORDY 0.001 score FSL_CTYPE_WIN1251 0.001 -score FSL_HELO_BARE_IP_2 1.999 -score FSL_HELO_FAKE 1.390 -score FSL_NEW_HELO_USER 0.631 -score FUZZY_DR_OZ 2.699 +score FSL_HELO_BARE_IP_2 1.499 +score FSL_NEW_HELO_USER 0.001 +score HDRS_LCASE 0.100 score HEADER_FROM_DIFFERENT_DOMAINS 0.001 # force non-zero -score HK_LOTTO 0.770 -score HK_LOTTO_NAME 0.999 -score HK_NAME_FM_MR_MRS 1.459 -score HK_NAME_FROM 0.985 -score HK_NAME_MR_MRS 0.001 # force non-zero -score HK_RANDOM_FROM 1.000 -score HK_RANDOM_REPLYTO 0.999 -score HK_SCAM_N1 2.699 -score HK_SCAM_N13 2.599 -score HK_SCAM_N2 2.516 -score HK_SCAM_N3 1.116 -score HK_SCAM_N8 1.461 -score LONG_HEX_URI 2.737 +score HELO_MISC_IP 0.250 +score HK_RANDOM_FROM 0.001 # force non-zero +score HK_SCAM_N15 2.499 +score HK_SCAM_N2 0.001 # force non-zero +score IMG_DIRECT_TO_MX 2.400 +score LIST_PRTL_SAME_USER 0.286 +score LONG_HEX_URI 2.290 +score LONG_IMG_URI 0.100 score LOTS_OF_MONEY 0.001 # force non-zero -score LOTTO_AGENT 2.499 -score LOTTO_DEPT 0.001 # force non-zero -score MALFORMED_FREEMAIL 1.429 -score MONEY_ATM_CARD 1.476 -score MONEY_BARRISTER 0.999 -score MONEY_FORM 0.001 -score MONEY_FORM_SHORT 1.260 -score MONEY_FRAUD_3 3.099 +score LOTTO_AGENT 1.499 +score LOTTO_DEPT 0.001 +score MIMEOLE_DIRECT_TO_MX 0.381 +score MONEY_FRAUD_3 0.001 score MONEY_FRAUD_5 0.001 -score MONEY_FRAUD_8 0.504 -score MONEY_FROM_41 0.686 -score MONEY_FROM_MISSP 0.001 -score NAME_EMAIL_DIFF 0.374 -score NSL_RCVD_FROM_USER 0.020 -score NSL_RCVD_HELO_USER 2.523 -score PDS_FROM_2_EMAILS 0.675 -score PP_MIME_FAKE_ASCII_TEXT 0.243 -score RCVD_DBL_DQ 2.699 -score RCVD_IN_MSPIKE_H2 -0.211 -score RCVD_IN_MSPIKE_L3 1.606 -score RCVD_IN_MSPIKE_L4 2.088 -score RCVD_IN_MSPIKE_L5 2.566 -score RCVD_IN_MSPIKE_ZBI 0.001 # force non-zero -score RISK_FREE 3.058 -score SERGIO_SUBJECT_PORN014 0.064 -score SERGIO_SUBJECT_VIAGRA01 2.242 -score SHORTENED_URL_SRC 0.368 -score SINGLETS_LOW_CONTRAST 1.703 -score STYLE_GIBBERISH 3.499 -score TAB_IN_FROM 0.499 -score THIS_AD 1.673 -score TO_EQ_FM_DIRECT_MX 0.449 -score TO_EQ_FM_DOM_SPF_FAIL 0.001 # force non-zero -score TO_EQ_FM_SPF_FAIL 0.001 -score TO_IN_SUBJ 0.100 -score TO_NO_BRKTS_FROM_MSSP 0.699 -score TO_NO_BRKTS_HTML_IMG 1.483 -score TO_NO_BRKTS_HTML_ONLY 0.591 -score TO_NO_BRKTS_MSFT 2.500 +score MONEY_FRAUD_8 0.001 # force non-zero +score MONEY_LOTTERY 1.611 +score MSGID_NOFQDN1 3.299 +score MSM_PRIO_REPTO 0.180 +score NSL_RCVD_FROM_USER 0.001 +score NSL_RCVD_HELO_USER 0.001 +score PHP_ORIG_SCRIPT 2.499 +score PP_MIME_FAKE_ASCII_TEXT 0.001 +score RCVD_IN_MSPIKE_H2 -2.800 +score RCVD_IN_MSPIKE_L3 0.001 # force non-zero +score RCVD_IN_MSPIKE_L4 0.001 +score RCVD_IN_MSPIKE_L5 0.001 +score RP_MATCHES_RCVD -0.001 +score SHARE_50_50 1.818 +score SPOOFED_FREEM_REPTO 1.368 +score STATIC_XPRIO_OLE 0.001 +score STOCK_LOW_CONTRAST 2.347 +score STYLE_GIBBERISH 3.093 +score THIS_AD 2.200 +score TO_EQ_FM_DIRECT_MX 0.622 +score TO_IN_SUBJ 0.099 +score TO_NO_BRKTS_FROM_MSSP 0.001 +score TO_NO_BRKTS_HTML_IMG 2.000 +score TO_NO_BRKTS_HTML_ONLY 0.001 +score TO_NO_BRKTS_MSFT 0.001 score TO_NO_BRKTS_NORDNS_HTML 0.001 -score TO_NO_BRKTS_PCNT 1.137 -score TVD_PH_BODY_META 1.984 -score TVD_SPACE_ENCODED 2.499 -score TVD_SPACE_ENC_FM_MIME 1.999 -score TVD_SPACE_RATIO_MINFP 2.749 -score URI_DQ_UNSUB 2.599 -score URI_ONLY_MSGID_MALF 2.002 -score URI_OPTOUT_3LD 0.001 # force non-zero -score URI_OPTOUT_USME 0.001 -score URI_PHISH 0.891 -score URI_TRY_3LD 0.001 -score URI_TRY_USME 0.001 -score URI_WPADMIN 2.699 -score URI_WP_DIRINDEX 3.000 -score URI_WP_HACKED 2.999 -score URI_WP_HACKED_2 1.999 -score XPRIO 0.999 -score YOU_INHERIT 2.900 -score ACCT_PHISHING 1.000 -score AC_HTML_NONSENSE_TAGS 1.000 +score TO_NO_BRKTS_PCNT 0.001 +score TVD_SPACE_ENCODED 0.001 # force non-zero +score TVD_SPACE_ENC_FM_MIME 0.001 +score TVD_SPACE_RATIO_MINFP 0.001 +score URI_GOOGLE_PROXY 1.378 +score URI_ONLY_MSGID_MALF 1.191 +score URI_PHISH 3.999 +score URI_TRY_3LD 0.001 # force non-zero +score URI_TRY_USME 0.001 # force non-zero +score URI_WPADMIN 3.014 +score URI_WP_HACKED 3.000 +score URI_WP_HACKED_2 1.764 +score XPRIO 2.249 score AC_SPAMMY_URI_PATTERNS1 1.000 +score AC_SPAMMY_URI_PATTERNS10 1.000 score AC_SPAMMY_URI_PATTERNS11 1.000 score AC_SPAMMY_URI_PATTERNS12 1.000 score AC_SPAMMY_URI_PATTERNS2 1.000 @@ -136,20 +107,20 @@ score AC_SPAMMY_URI_PATTERNS4 1.000 score AC_SPAMMY_URI_PATTERNS8 1.000 score AC_SPAMMY_URI_PATTERNS9 1.000 score ADVANCE_FEE_2_NEW_FORM 1.000 -score CANT_SEE_AD 1.000 +score ENCRYPTED_MESSAGE -1.000 score FORM_LOW_CONTRAST 1.000 score FROM_MISSP_SPF_FAIL 1.000 +score FROM_WORDY_SHORT 1.000 score GOOGLE_DOCS_PHISH 1.000 score GOOGLE_DOCS_PHISH_MANY 1.000 score GOOG_MALWARE_DNLD 1.000 score HEXHASH_WORD 1.000 score HTML_OFF_PAGE 1.000 score LIST_PRTL_PUMPDUMP 1.000 -score LIST_PRTL_SAME_USER 1.000 -score LONG_IMG_URI 1.000 score LUCRATIVE 1.000 score MIME_NO_TEXT 1.000 score PHP_NOVER_MUA 1.000 +score PHP_SCRIPT_MUA 1.000 score PP_TOO_MUCH_UNICODE02 0.500 score PP_TOO_MUCH_UNICODE05 1.000 score PUMPDUMP 1.000 @@ -160,7 +131,12 @@ score RCVD_IN_MSPIKE_H3 -0.010 score RCVD_IN_MSPIKE_H4 -0.010 score RCVD_IN_MSPIKE_H5 -1.000 score RCVD_IN_MSPIKE_WL -0.010 -score STOCK_LOW_CONTRAST 1.000 +score SPOOFED_FREEM_REPTO_CHN 1.000 +score STOCK_TIP 1.000 score SYSADMIN 1.000 score TW_GIBBERISH_MANY 1.000 score UC_GIBBERISH_OBFU 1.000 +score URI_DATA 1.000 +score URI_OPTOUT_3LD 1.000 +score URI_WP_DIRINDEX 1.000 +score XPRIO_SHORT_SUBJ 1.000 diff --git rulesrc/scores/scores-set2 rulesrc/scores/scores-set2 index f1954ab3e..cac8c40c6 100644 --- rulesrc/scores/scores-set2 +++ rulesrc/scores/scores-set2 @@ -1,142 +1,98 @@ -# Using score set 0 logs for revision 1675274 from: -# ham-axb-8mile.log ham-axb-coi-bulk.log ham-axb-generic.log ham-axb-ham-misc.log ham-bernie-fsf.log ham-bernie-it_batt.log ham-bernie-mix.log ham-bpoliakoff.log ham-darxus.log ham-dwarren.log ham-grenier.log ham-jarif.log ham-kam-ninja.log ham-kpg-core.log ham-mas-cps.log ham-mas-mas.log ham-mmiroslaw-ham.log ham-mmiroslaw-spam.log ham-zmi.log spam-axb-8mile.log spam-axb-coi-bulk.log spam-axb-generic.log spam-axb-ham-misc.log spam-bernie-fsf.log spam-bernie-it_batt.log spam-bernie-mix.log spam-bpoliakoff.log spam-darxus.log spam-dwarren.log spam-grenier.log spam-jarif.log spam-kam-ninja.log spam-kpg-core.log spam-mas-cps.log spam-mas-mas.log spam-mmiroslaw-ham.log spam-mmiroslaw-spam.log spam-zmi.log +# Using score set 0 logs for revision 1786853 from: +# ham-axb-coi-bulk.log ham-axb-generic.log ham-axb-ham-misc.log ham-axb-ninja.log ham-darxus.log ham-ena.log ham-grenier.log ham-jarif.log ham-kgolding.log ham-thendrikx.log ham-zmi.log spam-axb-coi-bulk.log spam-axb-generic.log spam-axb-ham-misc.log spam-axb-ninja.log spam-darxus.log spam-ena.log spam-grenier.log spam-jarif.log spam-kgolding.log spam-thendrikx.log spam-zmi.log score AC_BR_BONANZA 0.001 -score AC_DIV_BONANZA 0.001 # force non-zero -score AC_SPAMMY_URI_PATTERNS10 3.999 -score ADMITS_SPAM 2.899 -score ADVANCE_FEE_2_NEW_FRM_MNY 0.001 -score ADVANCE_FEE_2_NEW_MONEY 1.999 -score ADVANCE_FEE_3_NEW 3.499 -score ADVANCE_FEE_3_NEW_FRM_MNY 0.249 -score ADVANCE_FEE_3_NEW_MONEY 3.499 -score ADVANCE_FEE_4_NEW 2.699 -score ADVANCE_FEE_4_NEW_FRM_MNY 1.889 -score ADVANCE_FEE_4_NEW_MONEY 1.702 -score ADVANCE_FEE_5_NEW 3.099 -score ADVANCE_FEE_5_NEW_FORM 1.581 -score ADVANCE_FEE_5_NEW_FRM_MNY 1.485 -score ADVANCE_FEE_5_NEW_MONEY 3.899 -score AD_PREFS 0.001 -score AXB_XMAILER_MIMEOLE_OL_024C2 1.253 -score AXB_XMAILER_MIMEOLE_OL_1ECD5 2.189 -score AXB_XM_FORGED_OL2600 2.152 -score AXB_X_FF_SEZ_S 2.499 -score BIGNUM_EMAILS 2.801 -score BODY_URI_ONLY 0.999 -score CK_HELO_DYNAMIC_SPLIT_IP 1.499 +score AC_DIV_BONANZA 0.001 +score ADVANCE_FEE_2_NEW_MONEY 1.997 +score ADVANCE_FEE_3_NEW 3.496 +score ADVANCE_FEE_3_NEW_MONEY 2.796 +score ADVANCE_FEE_4_NEW 2.596 +score ADVANCE_FEE_5_NEW 2.996 +score AXB_XMAILER_MIMEOLE_OL_024C2 0.367 +score AXB_XMAILER_MIMEOLE_OL_1ECD5 0.181 +score AXB_XM_FORGED_OL2600 1.190 +score BODY_EMPTY 1.997 +score BODY_URI_ONLY 0.998 +score BOGUS_MSM_HDRS 0.909 +score CANT_SEE_AD 2.996 +score CK_HELO_DYNAMIC_SPLIT_IP 1.350 score CK_HELO_GENERIC 0.249 -score COMMENT_GIBBERISH 1.499 -score COMPENSATION 1.499 -score DEAR_BENEFICIARY 3.041 -score DOS_OUTLOOK_TO_MX_IMAGE 2.400 -score DSN_NO_MIMEVERSION 1.999 -score DX_TEXT_05 2.071 -score FBI_MONEY 0.957 +score COMMENT_GIBBERISH 1.498 +score DATE_IN_FUTURE_96_Q 3.296 +score FBI_MONEY 0.696 score FBI_SPOOF 1.999 -score FILL_THIS_FORM 0.001 -score FORM_FRAUD 0.999 -score FORM_FRAUD_3 2.398 -score FORM_FRAUD_5 2.948 -score FOUND_YOU 3.250 -score FREEMAIL_DOC_PDF_BCC 2.599 -score FREEMAIL_FORGED_FROMDOMAIN 0.250 +score FILL_THIS_FORM 2.748 +score FORM_FRAUD 0.998 +score FORM_FRAUD_3 2.696 +score FORM_FRAUD_5 0.209 +score FOUND_YOU 3.013 +score FREEMAIL_DOC_PDF_BCC 2.596 +score FREEMAIL_FORGED_FROMDOMAIN 0.001 +score FROM_IN_TO_AND_SUBJ 0.287 score FROM_MISSPACED 0.001 -score FROM_MISSP_DYNIP 1.826 -score FROM_MISSP_EH_MATCH 0.001 -score FROM_MISSP_FREEMAIL 3.999 -score FROM_MISSP_MSFT 0.001 -score FROM_MISSP_PHISH 3.089 -score FROM_MISSP_REPLYTO 2.600 -score FROM_MISSP_TO_UNDISC 0.001 # force non-zero +score FROM_MISSP_FREEMAIL 3.595 +score FROM_MISSP_MSFT 0.001 # force non-zero +score FROM_MISSP_REPLYTO 0.001 +score FROM_MISSP_TO_UNDISC 1.438 score FROM_MISSP_USER 0.001 score FROM_MISSP_XPRIO 0.001 -score FSL_BOTSPAM_1 2.412 +score FROM_WORDY 2.497 score FSL_CTYPE_WIN1251 0.001 -score FSL_HELO_BARE_IP_2 1.999 -score FSL_HELO_FAKE 2.908 -score FSL_NEW_HELO_USER 1.999 -score FUZZY_CLICK_HERE 2.499 -score FUZZY_DR_OZ 2.599 +score FSL_HELO_BARE_IP_2 1.498 +score FSL_NEW_HELO_USER 0.083 +score HDRS_LCASE 0.099 score HEADER_FROM_DIFFERENT_DOMAINS 0.001 # force non-zero -score HEXHASH_WORD 2.600 -score HK_LOTTO 0.511 -score HK_LOTTO_NAME 1.000 -score HK_NAME_FM_MR_MRS 1.499 -score HK_NAME_MR_MRS 0.999 -score HK_RANDOM_FROM 1.000 -score HK_RANDOM_REPLYTO 1.000 -score HK_SCAM_N1 2.599 -score HK_SCAM_N13 1.306 -score HK_SCAM_N2 3.499 -score HK_SCAM_N3 2.246 -score HK_SCAM_N8 0.808 -score HK_SPAMMY_FILENAME 2.499 -score LONG_HEX_URI 2.999 +score HELO_MISC_IP 0.248 +score HK_RANDOM_FROM 0.998 +score HK_SCAM_N15 1.935 +score HK_SCAM_N2 3.249 +score IMG_DIRECT_TO_MX 2.397 +score LIST_PRTL_SAME_USER 0.001 +score LONG_HEX_URI 2.194 +score LONG_IMG_URI 0.553 score LOTS_OF_MONEY 0.001 # force non-zero -score LOTTO_AGENT 2.499 -score LOTTO_DEPT 0.728 -score MAILER_EQ_ORG 2.499 -score MALFORMED_FREEMAIL 2.546 -score MANY_SPAN_IN_TEXT 2.299 -score MIME_NO_TEXT 2.000 -score MONEY_ATM_CARD 2.690 -score MONEY_BARRISTER 0.999 -score MONEY_FORM 0.552 -score MONEY_FORM_SHORT 2.481 -score MONEY_FRAUD_3 2.718 -score MONEY_FRAUD_5 1.862 -score MONEY_FRAUD_8 2.801 -score MONEY_FROM_41 1.999 -score MONEY_FROM_MISSP 0.001 -score NAME_EMAIL_DIFF 3.899 -score NSL_RCVD_FROM_USER 0.001 -score NSL_RCVD_HELO_USER 0.001 -score OBFU_ATTACH_MISSP 2.227 -score OBFU_TEXT_ATTACH 2.075 -score PDS_FROM_2_EMAILS 4.299 -score PHP_NOVER_MUA 3.499 -score PP_MIME_FAKE_ASCII_TEXT 0.999 -score RCVD_DBL_DQ 2.599 -score RISK_FREE 2.601 -score SERGIO_SUBJECT_PORN014 2.602 -score SERGIO_SUBJECT_VIAGRA01 2.699 -score SHARE_50_50 1.932 -score SHORTENED_URL_SRC 0.814 -score SINGLETS_LOW_CONTRAST 2.086 -score STYLE_GIBBERISH 2.801 -score TAB_IN_FROM 0.499 -score THIS_AD 2.002 -score TO_EQ_FM_DIRECT_MX 3.050 +score LOTTO_AGENT 1.498 +score LOTTO_DEPT 0.001 +score MIMEOLE_DIRECT_TO_MX 1.445 +score MONEY_FRAUD_3 2.896 +score MONEY_FRAUD_5 3.096 +score MONEY_FRAUD_8 2.548 +score MONEY_LOTTERY 2.498 +score MSGID_NOFQDN1 2.395 +score MSM_PRIO_REPTO 2.497 +score NSL_RCVD_FROM_USER 0.548 +score NSL_RCVD_HELO_USER 1.273 +score PHP_ORIG_SCRIPT 0.502 +score PP_MIME_FAKE_ASCII_TEXT 0.429 +score RP_MATCHES_RCVD -1.050 +score SHARE_50_50 2.121 +score SPOOFED_FREEM_REPTO 2.498 +score STATIC_XPRIO_OLE 1.997 +score STOCK_LOW_CONTRAST 2.030 +score STYLE_GIBBERISH 2.800 +score THIS_AD 0.596 +score TO_EQ_FM_DIRECT_MX 2.497 score TO_IN_SUBJ 0.099 -score TO_NO_BRKTS_FROM_MSSP 2.499 -score TO_NO_BRKTS_HTML_IMG 1.911 -score TO_NO_BRKTS_HTML_ONLY 1.800 -score TO_NO_BRKTS_MSFT 2.499 -score TO_NO_BRKTS_NORDNS_HTML 0.001 -score TO_NO_BRKTS_PCNT 2.500 -score TVD_PH_BODY_META 0.884 -score TVD_SPACE_ENCODED 2.499 -score TVD_SPACE_ENC_FM_MIME 1.999 -score TVD_SPACE_RATIO_MINFP 2.749 -score URI_DQ_UNSUB 2.499 -score URI_GOOGLE_PROXY 2.299 -score URI_ONLY_MSGID_MALF 0.403 -score URI_OPTOUT_3LD 2.000 -score URI_OPTOUT_USME 2.999 -score URI_PHISH 1.400 -score URI_TRY_3LD 0.001 -score URI_TRY_USME 2.999 -score URI_WPADMIN 2.599 -score URI_WP_DIRINDEX 2.873 -score URI_WP_HACKED 2.999 -score URI_WP_HACKED_2 1.999 -score XM_PHPMAILER_FORGED 2.399 -score XPRIO 0.999 -score YOU_INHERIT 2.750 -score ACCT_PHISHING 1.000 +score TO_NO_BRKTS_FROM_MSSP 0.001 +score TO_NO_BRKTS_HTML_IMG 0.001 +score TO_NO_BRKTS_HTML_ONLY 1.997 +score TO_NO_BRKTS_MSFT 2.497 +score TO_NO_BRKTS_NORDNS_HTML 0.398 +score TO_NO_BRKTS_PCNT 2.497 +score TVD_SPACE_ENCODED 2.497 +score TVD_SPACE_ENC_FM_MIME 1.997 +score TVD_SPACE_RATIO_MINFP 2.497 +score URI_GOOGLE_PROXY 0.710 +score URI_ONLY_MSGID_MALF 0.001 # force non-zero +score URI_PHISH 3.995 +score URI_TRY_3LD 0.195 +score URI_WPADMIN 3.396 +score URI_WP_HACKED 2.996 +score URI_WP_HACKED_2 1.187 +score XPRIO 2.248 score AC_HTML_NONSENSE_TAGS 1.000 score AC_SPAMMY_URI_PATTERNS1 1.000 +score AC_SPAMMY_URI_PATTERNS10 1.000 score AC_SPAMMY_URI_PATTERNS11 1.000 score AC_SPAMMY_URI_PATTERNS12 1.000 score AC_SPAMMY_URI_PATTERNS2 1.000 @@ -145,26 +101,33 @@ score AC_SPAMMY_URI_PATTERNS4 1.000 score AC_SPAMMY_URI_PATTERNS8 1.000 score AC_SPAMMY_URI_PATTERNS9 1.000 score ADVANCE_FEE_2_NEW_FORM 1.000 -score CANT_SEE_AD 1.000 +score ENCRYPTED_MESSAGE -1.000 score FORM_LOW_CONTRAST 1.000 +score FROM_WORDY_SHORT 1.000 score GOOGLE_DOCS_PHISH 1.000 score GOOGLE_DOCS_PHISH_MANY 1.000 score GOOG_MALWARE_DNLD 1.000 -score HK_NAME_FROM 1.000 +score HEXHASH_WORD 1.000 score HTML_OFF_PAGE 1.000 score LIST_PRTL_PUMPDUMP 1.000 -score LIST_PRTL_SAME_USER 1.000 -score LONG_IMG_URI 1.000 score LUCRATIVE 1.000 +score MIME_NO_TEXT 1.000 +score PHP_NOVER_MUA 1.000 +score PHP_SCRIPT_MUA 1.000 score PP_TOO_MUCH_UNICODE02 0.500 score PP_TOO_MUCH_UNICODE05 1.000 score PUMPDUMP 1.000 score PUMPDUMP_MULTI 1.000 score RAND_HEADER_MANY 1.000 -score STOCK_LOW_CONTRAST 1.000 +score SPOOFED_FREEM_REPTO_CHN 1.000 +score STOCK_TIP 1.000 score SYSADMIN 1.000 score TW_GIBBERISH_MANY 1.000 score UC_GIBBERISH_OBFU 1.000 +score URI_DATA 1.000 +score URI_OPTOUT_3LD 1.000 +score URI_WP_DIRINDEX 1.000 +score XPRIO_SHORT_SUBJ 1.000 # in active.list but have no hits in recent corpus score FROM_MISSP_SPF_FAIL 0.001 # force non-zero score RCVD_IN_MSPIKE_BL 0.001 # force non-zero @@ -178,5 +141,6 @@ score RCVD_IN_MSPIKE_L4 0.001 # force non-zero score RCVD_IN_MSPIKE_L5 0.001 # force non-zero score RCVD_IN_MSPIKE_WL 0.001 # force non-zero score RCVD_IN_MSPIKE_ZBI 0.001 # force non-zero +score SURBL_BLOCKED 0.001 # force non-zero score TO_EQ_FM_DOM_SPF_FAIL 0.001 # force non-zero score TO_EQ_FM_SPF_FAIL 0.001 # force non-zero diff --git rulesrc/scores/scores-set3 rulesrc/scores/scores-set3 index 14c21518a..862b74f9b 100644 --- rulesrc/scores/scores-set3 +++ rulesrc/scores/scores-set3 @@ -1,133 +1,104 @@ -# Using score set 1 logs for revision 1674464 from: -# ham-net-axb-8mile.log ham-net-axb-coi-bulk.log ham-net-axb-generic.log ham-net-axb-ham-misc.log ham-net-bernie-fsf.log ham-net-bernie-it_batt.log ham-net-bernie-mix.log ham-net-bpoliakoff.log ham-net-darxus.log ham-net-dwarren.log ham-net-grenier.log ham-net-jarif.log ham-net-kam-ninja.log ham-net-kpg-core.log ham-net-mas-cps.log ham-net-mas-mas.log ham-net-zmi.log spam-net-axb-8mile.log spam-net-axb-coi-bulk.log spam-net-axb-generic.log spam-net-axb-ham-misc.log spam-net-bernie-fsf.log spam-net-bernie-it_batt.log spam-net-bernie-mix.log spam-net-bpoliakoff.log spam-net-darxus.log spam-net-dwarren.log spam-net-grenier.log spam-net-jarif.log spam-net-kam-ninja.log spam-net-kpg-core.log spam-net-mas-cps.log spam-net-mas-mas.log spam-net-zmi.log +# Using score set 1 logs for revision 1786494 from: +# ham-net-axb-coi-bulk.log ham-net-axb-generic.log ham-net-axb-ham-misc.log ham-net-axb-ninja.log ham-net-bb-guenther_fraud.log ham-net-darxus.log ham-net-dwarren.log ham-net-ena.log ham-net-grenier.log ham-net-jarif.log ham-net-kgolding.log ham-net-thendrikx.log ham-net-zmi.log spam-net-axb-coi-bulk.log spam-net-axb-generic.log spam-net-axb-ham-misc.log spam-net-axb-ninja.log spam-net-bb-guenther_fraud.log spam-net-darxus.log spam-net-dwarren.log spam-net-ena.log spam-net-grenier.log spam-net-jarif.log spam-net-kgolding.log spam-net-thendrikx.log spam-net-zmi.log -score AC_BR_BONANZA 0.001 -score AC_DIV_BONANZA 0.001 # force non-zero -score AC_SPAMMY_URI_PATTERNS10 3.461 -score ADMITS_SPAM 0.001 # force non-zero -score ADVANCE_FEE_2_NEW_FRM_MNY 0.001 # force non-zero -score ADVANCE_FEE_2_NEW_MONEY 1.999 -score ADVANCE_FEE_3_NEW 2.496 -score ADVANCE_FEE_3_NEW_FRM_MNY 2.256 -score ADVANCE_FEE_3_NEW_MONEY 3.599 -score ADVANCE_FEE_4_NEW 2.699 -score ADVANCE_FEE_4_NEW_FRM_MNY 1.815 -score ADVANCE_FEE_4_NEW_MONEY 1.789 -score ADVANCE_FEE_5_NEW 2.999 -score ADVANCE_FEE_5_NEW_FRM_MNY 1.802 -score ADVANCE_FEE_5_NEW_MONEY 3.699 -score AXB_XMAILER_MIMEOLE_OL_024C2 0.001 # force non-zero -score AXB_XMAILER_MIMEOLE_OL_1ECD5 1.191 -score BIGNUM_EMAILS 2.999 -score BODY_URI_ONLY 0.673 -score CK_HELO_DYNAMIC_SPLIT_IP 1.499 -score CK_HELO_GENERIC 0.250 +score AC_BR_BONANZA 0.001 # force non-zero +score AC_DIV_BONANZA 0.001 +score AC_HTML_NONSENSE_TAGS 0.001 +score ADVANCE_FEE_2_NEW_MONEY 0.001 +score ADVANCE_FEE_3_NEW 0.001 # force non-zero +score ADVANCE_FEE_3_NEW_MONEY 0.001 +score ADVANCE_FEE_4_NEW 0.789 +score ADVANCE_FEE_5_NEW 0.001 # force non-zero +score AXB_XMAILER_MIMEOLE_OL_024C2 0.001 +score AXB_XMAILER_MIMEOLE_OL_1ECD5 0.001 +score AXB_XM_FORGED_OL2600 2.699 +score BODY_EMPTY 1.999 +score BODY_URI_ONLY 0.001 # force non-zero +score BOGUS_MSM_HDRS 0.001 +score CANT_SEE_AD 0.500 +score CK_HELO_DYNAMIC_SPLIT_IP 0.001 +score CK_HELO_GENERIC 0.249 +score CN_B2B_SPAMMER 0.001 score COMMENT_GIBBERISH 1.499 -score COMPENSATION 1.499 -score DEAR_BENEFICIARY 3.499 -score DSN_NO_MIMEVERSION 1.999 -score DX_TEXT_05 1.218 -score FBI_MONEY 1.999 -score FBI_SPOOF 0.001 +score DATE_IN_FUTURE_96_Q 3.299 +score FBI_MONEY 0.001 +score FBI_SPOOF 1.999 score FILL_THIS_FORM 0.001 -score FORM_FRAUD 0.001 # force non-zero -score FORM_FRAUD_3 1.846 +score FORM_FRAUD 0.001 +score FORM_FRAUD_3 0.001 score FORM_FRAUD_5 0.001 -score FOUND_YOU 3.249 +score FOUND_YOU 0.001 score FREEMAIL_DOC_PDF_BCC 2.599 -score FREEMAIL_FORGED_FROMDOMAIN 0.249 +score FREEMAIL_FORGED_FROMDOMAIN 0.199 +score FROM_IN_TO_AND_SUBJ 0.262 score FROM_MISSPACED 0.001 -score FROM_MISSP_DYNIP 1.651 -score FROM_MISSP_EH_MATCH 0.326 score FROM_MISSP_FREEMAIL 0.001 -score FROM_MISSP_MSFT 0.001 -score FROM_MISSP_PHISH 3.338 -score FROM_MISSP_REPLYTO 0.001 # force non-zero +score FROM_MISSP_MSFT 0.001 # force non-zero +score FROM_MISSP_REPLYTO 0.001 score FROM_MISSP_TO_UNDISC 0.001 score FROM_MISSP_USER 0.001 score FROM_MISSP_XPRIO 0.001 -score FSL_BOTSPAM_1 0.589 +score FROM_WORDY 0.001 score FSL_CTYPE_WIN1251 0.001 -score FSL_HELO_BARE_IP_2 1.999 -score FSL_HELO_FAKE 1.390 -score FSL_NEW_HELO_USER 0.631 -score FUZZY_DR_OZ 2.699 +score FSL_HELO_BARE_IP_2 1.499 +score FSL_NEW_HELO_USER 0.001 +score HDRS_LCASE 0.100 score HEADER_FROM_DIFFERENT_DOMAINS 0.001 # force non-zero -score HK_LOTTO 0.770 -score HK_LOTTO_NAME 0.999 -score HK_NAME_FM_MR_MRS 1.459 -score HK_NAME_FROM 0.985 -score HK_NAME_MR_MRS 0.001 # force non-zero -score HK_RANDOM_FROM 1.000 -score HK_RANDOM_REPLYTO 0.999 -score HK_SCAM_N1 2.699 -score HK_SCAM_N13 2.599 -score HK_SCAM_N2 2.516 -score HK_SCAM_N3 1.116 -score HK_SCAM_N8 1.461 -score LONG_HEX_URI 2.737 +score HELO_MISC_IP 0.250 +score HK_RANDOM_FROM 0.001 # force non-zero +score HK_SCAM_N15 2.499 +score HK_SCAM_N2 0.001 # force non-zero +score IMG_DIRECT_TO_MX 2.400 +score LIST_PRTL_SAME_USER 0.286 +score LONG_HEX_URI 2.290 +score LONG_IMG_URI 0.100 score LOTS_OF_MONEY 0.001 # force non-zero -score LOTTO_AGENT 2.499 -score LOTTO_DEPT 0.001 # force non-zero -score MALFORMED_FREEMAIL 1.429 -score MONEY_ATM_CARD 1.476 -score MONEY_BARRISTER 0.999 -score MONEY_FORM 0.001 -score MONEY_FORM_SHORT 1.260 -score MONEY_FRAUD_3 3.099 +score LOTTO_AGENT 1.499 +score LOTTO_DEPT 0.001 +score MIMEOLE_DIRECT_TO_MX 0.381 +score MONEY_FRAUD_3 0.001 score MONEY_FRAUD_5 0.001 -score MONEY_FRAUD_8 0.504 -score MONEY_FROM_41 0.686 -score MONEY_FROM_MISSP 0.001 -score NAME_EMAIL_DIFF 0.374 -score NSL_RCVD_FROM_USER 0.020 -score NSL_RCVD_HELO_USER 2.523 -score PDS_FROM_2_EMAILS 0.675 -score PP_MIME_FAKE_ASCII_TEXT 0.243 -score RCVD_DBL_DQ 2.699 -score RCVD_IN_MSPIKE_H2 -0.211 -score RCVD_IN_MSPIKE_L3 1.606 -score RCVD_IN_MSPIKE_L4 2.088 -score RCVD_IN_MSPIKE_L5 2.566 -score RCVD_IN_MSPIKE_ZBI 0.001 # force non-zero -score RISK_FREE 3.058 -score SERGIO_SUBJECT_PORN014 0.064 -score SERGIO_SUBJECT_VIAGRA01 2.242 -score SHORTENED_URL_SRC 0.368 -score SINGLETS_LOW_CONTRAST 1.703 -score STYLE_GIBBERISH 3.499 -score TAB_IN_FROM 0.499 -score THIS_AD 1.673 -score TO_EQ_FM_DIRECT_MX 0.449 -score TO_EQ_FM_DOM_SPF_FAIL 0.001 # force non-zero -score TO_EQ_FM_SPF_FAIL 0.001 -score TO_IN_SUBJ 0.100 -score TO_NO_BRKTS_FROM_MSSP 0.699 -score TO_NO_BRKTS_HTML_IMG 1.483 -score TO_NO_BRKTS_HTML_ONLY 0.591 -score TO_NO_BRKTS_MSFT 2.500 +score MONEY_FRAUD_8 0.001 # force non-zero +score MONEY_LOTTERY 1.611 +score MSGID_NOFQDN1 3.299 +score MSM_PRIO_REPTO 0.180 +score NSL_RCVD_FROM_USER 0.001 +score NSL_RCVD_HELO_USER 0.001 +score PHP_ORIG_SCRIPT 2.499 +score PP_MIME_FAKE_ASCII_TEXT 0.001 +score RCVD_IN_MSPIKE_H2 -2.800 +score RCVD_IN_MSPIKE_L3 0.001 # force non-zero +score RCVD_IN_MSPIKE_L4 0.001 +score RCVD_IN_MSPIKE_L5 0.001 +score RP_MATCHES_RCVD -0.001 +score SHARE_50_50 1.818 +score SPOOFED_FREEM_REPTO 1.368 +score STATIC_XPRIO_OLE 0.001 +score STOCK_LOW_CONTRAST 2.347 +score STYLE_GIBBERISH 3.093 +score THIS_AD 2.200 +score TO_EQ_FM_DIRECT_MX 0.622 +score TO_IN_SUBJ 0.099 +score TO_NO_BRKTS_FROM_MSSP 0.001 +score TO_NO_BRKTS_HTML_IMG 2.000 +score TO_NO_BRKTS_HTML_ONLY 0.001 +score TO_NO_BRKTS_MSFT 0.001 score TO_NO_BRKTS_NORDNS_HTML 0.001 -score TO_NO_BRKTS_PCNT 1.137 -score TVD_PH_BODY_META 1.984 -score TVD_SPACE_ENCODED 2.499 -score TVD_SPACE_ENC_FM_MIME 1.999 -score TVD_SPACE_RATIO_MINFP 2.749 -score URI_DQ_UNSUB 2.599 -score URI_ONLY_MSGID_MALF 2.002 -score URI_OPTOUT_3LD 0.001 # force non-zero -score URI_OPTOUT_USME 0.001 -score URI_PHISH 0.891 -score URI_TRY_3LD 0.001 -score URI_TRY_USME 0.001 -score URI_WPADMIN 2.699 -score URI_WP_DIRINDEX 3.000 -score URI_WP_HACKED 2.999 -score URI_WP_HACKED_2 1.999 -score XPRIO 0.999 -score YOU_INHERIT 2.900 -score ACCT_PHISHING 1.000 -score AC_HTML_NONSENSE_TAGS 1.000 +score TO_NO_BRKTS_PCNT 0.001 +score TVD_SPACE_ENCODED 0.001 # force non-zero +score TVD_SPACE_ENC_FM_MIME 0.001 +score TVD_SPACE_RATIO_MINFP 0.001 +score URI_GOOGLE_PROXY 1.378 +score URI_ONLY_MSGID_MALF 1.191 +score URI_PHISH 3.999 +score URI_TRY_3LD 0.001 # force non-zero +score URI_TRY_USME 0.001 # force non-zero +score URI_WPADMIN 3.014 +score URI_WP_HACKED 3.000 +score URI_WP_HACKED_2 1.764 +score XPRIO 2.249 score AC_SPAMMY_URI_PATTERNS1 1.000 +score AC_SPAMMY_URI_PATTERNS10 1.000 score AC_SPAMMY_URI_PATTERNS11 1.000 score AC_SPAMMY_URI_PATTERNS12 1.000 score AC_SPAMMY_URI_PATTERNS2 1.000 @@ -136,20 +107,20 @@ score AC_SPAMMY_URI_PATTERNS4 1.000 score AC_SPAMMY_URI_PATTERNS8 1.000 score AC_SPAMMY_URI_PATTERNS9 1.000 score ADVANCE_FEE_2_NEW_FORM 1.000 -score CANT_SEE_AD 1.000 +score ENCRYPTED_MESSAGE -1.000 score FORM_LOW_CONTRAST 1.000 score FROM_MISSP_SPF_FAIL 1.000 +score FROM_WORDY_SHORT 1.000 score GOOGLE_DOCS_PHISH 1.000 score GOOGLE_DOCS_PHISH_MANY 1.000 score GOOG_MALWARE_DNLD 1.000 score HEXHASH_WORD 1.000 score HTML_OFF_PAGE 1.000 score LIST_PRTL_PUMPDUMP 1.000 -score LIST_PRTL_SAME_USER 1.000 -score LONG_IMG_URI 1.000 score LUCRATIVE 1.000 score MIME_NO_TEXT 1.000 score PHP_NOVER_MUA 1.000 +score PHP_SCRIPT_MUA 1.000 score PP_TOO_MUCH_UNICODE02 0.500 score PP_TOO_MUCH_UNICODE05 1.000 score PUMPDUMP 1.000 @@ -160,7 +131,12 @@ score RCVD_IN_MSPIKE_H3 -0.010 score RCVD_IN_MSPIKE_H4 -0.010 score RCVD_IN_MSPIKE_H5 -1.000 score RCVD_IN_MSPIKE_WL -0.010 -score STOCK_LOW_CONTRAST 1.000 +score SPOOFED_FREEM_REPTO_CHN 1.000 +score STOCK_TIP 1.000 score SYSADMIN 1.000 score TW_GIBBERISH_MANY 1.000 score UC_GIBBERISH_OBFU 1.000 +score URI_DATA 1.000 +score URI_OPTOUT_3LD 1.000 +score URI_WP_DIRINDEX 1.000 +score XPRIO_SHORT_SUBJ 1.000 diff --git rulesrc/scores/stats-set0 rulesrc/scores/stats-set0 index bbd8d292f..4d9689675 100644 --- rulesrc/scores/stats-set0 +++ rulesrc/scores/stats-set0 @@ -1,40 +1,40 @@ ##### WITH NEW RULES AND SCORES ##### # SUMMARY for threshold 5.0: -# Correctly non-spam: 200989 53.711% (96.903% of non-spam corpus) -# Correctly spam: 67586 18.061% (40.521% of spam corpus) -# False positives: 6423 1.716% (3.097% of nonspam, 154739 weighted) -# False negatives: 99205 26.511% (59.479% of spam, 173889 weighted) -# Average score for spam: 5.1 nonspam: 1.3 -# Average for false-pos: 6.0 false-neg: 1.8 -# TOTAL: 374203 100.00% +# Correctly non-spam: 124345 32.233% (97.470% of non-spam corpus) +# Correctly spam: 201122 52.136% (77.896% of spam corpus) +# False positives: 3228 0.837% (2.530% of nonspam, 191016 weighted) +# False negatives: 57071 14.794% (22.104% of spam, 176894 weighted) +# Average score for spam: 9.4 nonspam: 0.9 +# Average for false-pos: 5.8 false-neg: 3.1 +# TOTAL: 385766 100.00% Reading scores from "tmprules"... Reading per-message hit stat logs and scores... # SUMMARY for threshold 5.0: -# Correctly non-spam: 25197 97.01% -# Correctly spam: 8473 40.34% -# False positives: 776 2.99% -# False negatives: 12532 59.66% -# TCR(l=50): 0.409199 SpamRecall: 40.338% SpamPrec: 91.610% +# Correctly non-spam: 15547 97.08% +# Correctly spam: 25233 78.21% +# False positives: 468 2.92% +# False negatives: 7030 21.79% +# TCR(l=50): 1.060237 SpamRecall: 78.210% SpamPrec: 98.179% ##### WITHOUT NEW RULES AND SCORES ##### Reading scores from "../rules-base"... Reading per-message hit stat logs and scores... # SUMMARY for threshold 5.0: -# Correctly non-spam: 201285 97.05% -# Correctly spam: 39797 23.86% -# False positives: 6127 2.95% -# False negatives: 126994 76.14% -# TCR(l=50): 0.384893 SpamRecall: 23.860% SpamPrec: 86.658% +# Correctly non-spam: 124043 97.23% +# Correctly spam: 133387 51.66% +# False positives: 3530 2.77% +# False negatives: 124806 48.34% +# TCR(l=50): 0.856913 SpamRecall: 51.662% SpamPrec: 97.422% Reading scores from "../rules-base"... Reading per-message hit stat logs and scores... # SUMMARY for threshold 5.0: -# Correctly non-spam: 25260 97.25% -# Correctly spam: 5031 23.95% -# False positives: 713 2.75% -# False negatives: 15974 76.05% -# TCR(l=50): 0.406884 SpamRecall: 23.951% SpamPrec: 87.587% +# Correctly non-spam: 15514 96.87% +# Correctly spam: 16705 51.78% +# False positives: 501 3.13% +# False negatives: 15558 48.22% +# TCR(l=50): 0.794499 SpamRecall: 51.778% SpamPrec: 97.088% diff --git rulesrc/scores/stats-set1 rulesrc/scores/stats-set1 index de4832316..5af29de20 100644 --- rulesrc/scores/stats-set1 +++ rulesrc/scores/stats-set1 @@ -1,40 +1,40 @@ ##### WITH NEW RULES AND SCORES ##### # SUMMARY for threshold 5.0: -# Correctly non-spam: 202582 26.176% (99.677% of non-spam corpus) -# Correctly spam: 186188 24.058% (32.625% of spam corpus) -# False positives: 656 0.085% (0.323% of nonspam, 110792 weighted) -# False negatives: 384499 49.682% (67.375% of spam, 644384 weighted) -# Average score for spam: 4.5 nonspam: -0.0 -# Average for false-pos: 6.0 false-neg: 1.7 -# TOTAL: 773925 100.00% +# Correctly non-spam: 131456 34.275% (96.452% of non-spam corpus) +# Correctly spam: 207885 54.202% (84.082% of spam corpus) +# False positives: 4836 1.261% (3.548% of nonspam, 888189 weighted) +# False negatives: 39357 10.262% (15.918% of spam, 96144 weighted) +# Average score for spam: 10.5 nonspam: 0.2 +# Average for false-pos: 10.1 false-neg: 2.4 +# TOTAL: 383534 100.00% Reading scores from "tmprules"... Reading per-message hit stat logs and scores... # SUMMARY for threshold 5.0: -# Correctly non-spam: 25358 99.74% -# Correctly spam: 23230 32.61% -# False positives: 66 0.26% -# False negatives: 47998 67.39% -# TCR(l=50): 1.388514 SpamRecall: 32.614% SpamPrec: 99.717% +# Correctly non-spam: 16421 96.24% +# Correctly spam: 25947 84.09% +# False positives: 642 3.76% +# False negatives: 4909 15.91% +# TCR(l=50): 0.833743 SpamRecall: 84.091% SpamPrec: 97.585% ##### WITHOUT NEW RULES AND SCORES ##### Reading scores from "../rules-base"... Reading per-message hit stat logs and scores... # SUMMARY for threshold 5.0: -# Correctly non-spam: 202873 99.82% -# Correctly spam: 121683 21.32% -# False positives: 365 0.18% -# False negatives: 449004 78.68% -# TCR(l=50): 1.221364 SpamRecall: 21.322% SpamPrec: 99.701% +# Correctly non-spam: 131787 96.69% +# Correctly spam: 147796 59.78% +# False positives: 4505 3.31% +# False negatives: 99446 40.22% +# TCR(l=50): 0.761457 SpamRecall: 59.778% SpamPrec: 97.042% Reading scores from "../rules-base"... Reading per-message hit stat logs and scores... # SUMMARY for threshold 5.0: -# Correctly non-spam: 25385 99.85% -# Correctly spam: 15181 21.31% -# False positives: 39 0.15% -# False negatives: 56047 78.69% -# TCR(l=50): 1.228132 SpamRecall: 21.313% SpamPrec: 99.744% +# Correctly non-spam: 16467 96.51% +# Correctly spam: 18588 60.24% +# False positives: 596 3.49% +# False negatives: 12268 39.76% +# TCR(l=50): 0.733479 SpamRecall: 60.241% SpamPrec: 96.893% diff --git sa-check_spamd.raw sa-check_spamd.raw index b77bb0797..b65ed0510 100755 --- sa-check_spamd.raw +++ sa-check_spamd.raw @@ -454,10 +454,12 @@ C version 3.1.1 or higher (3.1.6 or higher recommended) Daryl C. W. O'Shea, DOS Technologies -=head1 LICENSE +=head1 COPYRIGHT AND LICENSE sa-check_spamd is distributed under the Apache License, Version 2.0, as described in the file C included with the Apache SpamAssassin distribution and available at http://www.apache.org/licenses/LICENSE-2.0 +Copyright (C) 2015 The Apache Software Foundation + =cut diff --git sa-compile.raw sa-compile.raw index e5447a13c..840ecd0f3 100755 --- sa-compile.raw +++ sa-compile.raw @@ -806,10 +806,13 @@ See The Apache SpamAssassin(tm) Project -=head1 COPYRIGHT +=head1 LICENSE AND COPYRIGHT SpamAssassin is distributed under the Apache License, Version 2.0, as described in the file C included with the distribution. +Copyright (C) 2015 The Apache Software Foundation + + =cut diff --git sa-learn.raw sa-learn.raw index cd8198094..d9ef9e9e8 100755 --- sa-learn.raw +++ sa-learn.raw @@ -18,7 +18,7 @@ use strict; use warnings; -use bytes; +# use bytes; use Errno qw(EBADF); use Getopt::Long; diff --git sa-update.raw sa-update.raw index 2f548dd52..dc866ed27 100755 --- sa-update.raw +++ sa-update.raw @@ -740,6 +740,16 @@ foreach my $channel (@channels) { my $mirror_info = $mirrors{$mirror}; delete $mirrors{$mirror}; + if (!check_mirror_af($mirror)) { + my @my_af; + push(@my_af, "IPv4") if $have_inet4; + push(@my_af, "IPv6") if $have_inet6; + push(@my_af, "no IP service") if !@my_af; + dbg("reject mirror %s: no common address family (%s)", + $mirror, join(" ", @my_af)); + next; + } + dbg("channel: selected mirror $mirror"); # Actual archive file @@ -856,7 +866,7 @@ foreach my $channel (@channels) { } # use the longest match we can find - $signer = $key if (length $key > length $signer); + $signer = $key if length $key > length $signer; } my $errno = 0; close CMD or $errno = $!; @@ -1594,6 +1604,18 @@ sub choose_mirror { ############################################################################## +sub check_mirror_af { + my ($mirror) = @_; + my($a_rr, $aaaa_rr); + # RFC 3986: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + $mirror =~ s{^[a-z][a-z0-9.+-]*://}{}si; # strip scheme like http:// + return 1 if $have_inet4 && do_dns_query($mirror, "A"); + return 1 if $have_inet6 && do_dns_query($mirror, "AAAA"); + return 0; +} + +############################################################################## + sub print_version { printf("sa-update version %s\n running on Perl version %s\n", $VERSION, join(".", map( 0+($_||0), ( $] =~ /(\d)\.(\d{3})(\d{3})?/ )))); @@ -2034,10 +2056,13 @@ See The Apache SpamAssassin(tm) Project -=head1 COPYRIGHT +=head1 LICENSE AND COPYRIGHT SpamAssassin is distributed under the Apache License, Version 2.0, as described in the file C included with the distribution. +Copyright (C) 2015 The Apache Software Foundation + + =cut diff --git spamassassin.raw spamassassin.raw index 9d03d4f85..93dee285f 100755 --- spamassassin.raw +++ spamassassin.raw @@ -476,7 +476,10 @@ sub wanted { # If we're not going to retest, just remove the markup and print it out if ( !$opt{'test-mode'} ) { - print $spamtest->remove_spamassassin_markup ($mail); + my $report = $spamtest->remove_spamassassin_markup ($mail); + # encode Unicode characters to UTF-8 octets + utf8::encode($report) if utf8::is_utf8($report); + print $report; $mail->finish(); $mail = undef; return 1; @@ -525,11 +528,17 @@ sub wanted { # OK, do checks and put out the message. my $status = $spamtest->check($mail); - print $status->rewrite_mail() or die "error writing: $!"; + { my $report = $status->rewrite_mail(); + # encode Unicode characters to UTF-8 octets + utf8::encode($report) if utf8::is_utf8($report); + print $report or die "error writing: $!"; + } if ( $opt{'test-mode'} ) { - use bytes; - print $status->get_report() or die "error writing: $!"; + my $report = $status->get_report(); + # encode Unicode characters to UTF-8 octets + utf8::encode($report) if utf8::is_utf8($report); + print $report or die "error writing: $!"; } # if this message was spam, set the exit value appropriately. @@ -889,8 +898,10 @@ See The SpamAssassin(tm) Project -=head1 COPYRIGHT +=head1 COPYRIGHT AND LICENSE SpamAssassin is distributed under the Apache License, Version 2.0, as described in the file C included with the distribution. +Copyright (C) 2015 The Apache Software Foundation + diff --git spamc/README.win spamc/README.win index c441aca55..121c65dfd 100644 --- spamc/README.win +++ spamc/README.win @@ -3,7 +3,11 @@ Building spamd/spamc under Windows Spamd currently does not run under Windows, but spamc does. -Spamd does work when built and run in the Cygwin unix emulation +With SpamAssassin 3.4.2, spamd has been made available for Windows. Note +that this is still experimental. In terms of stability and perfomance it +heavily depends on which Perl distribution and/or module versions you +are using. So do not use it in a production environment. Alternatively, +spamd also works when built and run in the Cygwin Unix emulation environment, and a pure Windows spamc can be used with it. You can also run a pure Windows spamc that communicates with spamd running on any network accessible computer. The unix sockets option is not supported @@ -27,8 +31,7 @@ suitable C compiler. To build SpamAssassin you must have installed a Windows version of Perl and the modules that are listed as required in the general SpamAssassin -documentation. So far the only Windows version of perl this has been -tested with is ActivePerl. +documentation. Building spamc for Windows has been tested with Microsoft Visual C++ 6.0 and with Microsoft Visual C++ Toolkit 2003. It will probably just work diff --git spamc/configure.pl spamc/configure.pl index 0f10f3010..c6f67f208 100755 --- spamc/configure.pl +++ spamc/configure.pl @@ -66,7 +66,7 @@ print join(' ', $Config{'perlpath'}, "version.h.pl") . "\n"; # Do the same thing as for the preprocessor below. package version_h; my $Z = $0; - local $0 = "version.h.pl"; + local $0 = "./version.h.pl"; local @ARGV = (); # Got to check for defined because the script returns shell error level! unless (defined do $0) { diff --git spamc/libspamc.c spamc/libspamc.c index 44065e6d3..b042607a4 100644 --- spamc/libspamc.c +++ spamc/libspamc.c @@ -1187,7 +1187,7 @@ int message_filter(struct transport *tp, const char *username, unsigned int throwaway; SSL_CTX *ctx = NULL; SSL *ssl = NULL; - SSL_METHOD *meth; + const SSL_METHOD *meth; char zlib_on = 0; unsigned char *zlib_buf = NULL; int zlib_bufsiz = 0; @@ -1213,11 +1213,7 @@ int message_filter(struct transport *tp, const char *username, if (flags & SPAMC_USE_SSL) { #ifdef SPAMC_SSL SSLeay_add_ssl_algorithms(); - if (flags & SPAMC_TLSV1) { - meth = TLSv1_client_method(); - } else { - meth = SSLv3_client_method(); /* default */ - } + meth = SSLv23_client_method(); SSL_load_error_strings(); ctx = SSL_CTX_new(meth); #else @@ -1596,7 +1592,7 @@ int message_tell(struct transport *tp, const char *username, int flags, int failureval; SSL_CTX *ctx = NULL; SSL *ssl = NULL; - SSL_METHOD *meth; + const SSL_METHOD *meth; assert(tp != NULL); assert(m != NULL); @@ -1604,7 +1600,7 @@ int message_tell(struct transport *tp, const char *username, int flags, if (flags & SPAMC_USE_SSL) { #ifdef SPAMC_SSL SSLeay_add_ssl_algorithms(); - meth = SSLv3_client_method(); + meth = SSLv23_client_method(); SSL_load_error_strings(); ctx = SSL_CTX_new(meth); #else diff --git spamc/spamc.c spamc/spamc.c index 277a6a8da..38a7df548 100644 --- spamc/spamc.c +++ spamc/spamc.c @@ -368,16 +368,11 @@ read_args(int argc, char **argv, case 'S': { flags |= SPAMC_USE_SSL; - if (!spamc_optarg || (strcmp(spamc_optarg,"sslv3") == 0)) { - flags |= SPAMC_SSLV3; - } - else if (strcmp(spamc_optarg,"tlsv1") == 0) { - flags |= SPAMC_TLSV1; - } - else { - libspamc_log(flags, LOG_ERR, "Please specify a legal ssl version (%s)", spamc_optarg); - ret = EX_USAGE; - } + if(spamc_optarg) { + libspamc_log(flags, LOG_ERR, + "Explicit specification of an SSL/TLS version no longer supported."); + ret = EX_USAGE; + } break; } #endif diff --git spamc/spamc.pod spamc/spamc.pod index 02a79e016..93ee4ad8d 100644 --- spamc/spamc.pod +++ spamc/spamc.pod @@ -74,6 +74,18 @@ all addresses of one host before it tries the next one in the list. Note that this fail-over behaviour is incompatible with B<-x>; if that switch is used, fail-over will not occur. +=item B<-4> + +Use IPv4 only for connecting to server. Restricts domain name resolution of +spamd server host(s) to address family AF_INET through the C +flag in the call to getaddrinfo(3). + +=item B<-6> + +Use IPv6 only for connecting to server. Restricts domain name resolution of +spamd server host(s) to address family AF_INET6 through the C +flag in the call to getaddrinfo(3). + =item B<-e> I I<[args]>, B<--pipe-to> I I<[args]> Instead of writing to stdout, pipe the output to I's standard input. @@ -177,12 +189,10 @@ The default is 1 time (ie. one attempt and no retries). Sleep for I seconds between failed spamd filtering attempts. The default is 1 second. -=item B<-S>, B<--ssl>, B<--ssl>=I +=item B<-S>, B<--ssl>, B<--ssl> If spamc was built with support for SSL, encrypt data to and from the spamd process with SSL; spamd must support SSL as well. -I specifies the SSL protocol version to use, either -C, or C. The default, is C. =item B<-t> I, B<--timeout>=I diff --git spamd-apache2/Makefile.PL spamd-apache2/Makefile.PL index 0aaa5b37b..018ca580f 100644 --- spamd-apache2/Makefile.PL +++ spamd-apache2/Makefile.PL @@ -20,7 +20,7 @@ WriteMakefile( VERSION => '0.03', NAME => 'Mail::SpamAssassin::Spamd::Apache2', ABSTRACT => 'mod_perl2 module implementing spamd in Apache2', - AUTHOR => 'Radoslaw Zielinski ', + AUTHOR => 'The Apache SpamAssassin Project ', EXE_FILES => [qw(bin/apache-spamd.pl)], PREREQ_PM => { 'mod_perl2' => 2, diff --git spamd-apache2/README.apache spamd-apache2/README.apache index c614869c3..5ecc75d9e 100644 --- spamd-apache2/README.apache +++ spamd-apache2/README.apache @@ -45,28 +45,20 @@ available separately on CPAN. BUGS + +See to report a bug. -For now, report to me directly or to the SpamAssassin dev list. +Please include perl, Apache and mod_perl versions. -Include perl, Apache and mod_perl versions. `httpd -V` shouldn't hurt -(unless you know it doesn't matter in your case). `httpd -l` might also -be handy, if you're reporting an apache-spamd.pl issue. Don't forget -relevant lines from logs/error_log. +`httpd -V` shouldn't hurt (unless you know it doesn't matter in your case). +`httpd -l` might also be handy, if you're reporting an apache-spamd.pl issue. +Don't forget relevant lines from logs/error_log. -Known bugs: worker (and other threading MPMs) probably will cause +Known bugs: + +Worker (and other threading MPMs) probably will cause problems. SA isn't really thread-safe; one example is using umask(). Some helpers like Razor / Pyzor / DCC probably do chdir(). I consider this a problem of SA, not this code. - -COPYRIGHT AND LICENCE - -Copyright (C) 2006 by Radosław Zieliński - -Based on spamd code, (C) by The SpamAssassin(tm) Project - -This library is free software; you can redistribute it and/or modify it -under the terms of the Apache License, Version 2.0. - - # vim: encoding=utf8 diff --git spamd-apache2/lib/Mail/SpamAssassin/Spamd.pm spamd-apache2/lib/Mail/SpamAssassin/Spamd.pm index 1a8f0a85e..aa532135c 100644 --- spamd-apache2/lib/Mail/SpamAssassin/Spamd.pm +++ spamd-apache2/lib/Mail/SpamAssassin/Spamd.pm @@ -47,6 +47,15 @@ Interface is likely to change. See the source code of L and L. +=head1 AUTHORS + +The SpamAssassin(tm) Project Ehttp://spamassassin.apache.org/E + +=head1 COPYRIGHT + +SpamAssassin is distributed under the Apache License, Version 2.0, as +described in the file C included with the distribution. + =head2 METHODS =over diff --git spamd-apache2/lib/Mail/SpamAssassin/Spamd/Apache2.pm spamd-apache2/lib/Mail/SpamAssassin/Spamd/Apache2.pm index 0af4831fb..0d3772174 100644 --- spamd-apache2/lib/Mail/SpamAssassin/Spamd/Apache2.pm +++ spamd-apache2/lib/Mail/SpamAssassin/Spamd/Apache2.pm @@ -17,6 +17,8 @@ use APR::Pool (); # cleanup_register use APR::SockAddr (); use APR::Socket (); use APR::Status (); +use Apache::Test; +use constant APACHE24 => have_min_apache_version('2.4.0'); eval { use Time::HiRes qw(time); }; @@ -159,8 +161,8 @@ sub out { $_[0]->{out} } # -: a sub _server { $_[0]->c->base_server } # -: a sub _remote_host { $_[0]->c->get_remote_host } # -: a -sub _remote_ip { $_[0]->c->remote_ip } # -: a -sub _remote_port { $_[0]->c->remote_addr->port } # -: a +sub _remote_ip { APACHE24 ? $_[0]->c->client_ip : $_[0]->c->remote_ip; } # -: a +sub _remote_port { APACHE24 ? $_[0]->c->client_addr->port : $_[0]->c->remote_addr->port } # -: a sub send_buffer { # -: A diff --git spamd-apache2/lib/Mail/SpamAssassin/Spamd/Apache2/AclIP.pm spamd-apache2/lib/Mail/SpamAssassin/Spamd/Apache2/AclIP.pm index 3b0783398..b5a3893f0 100644 --- spamd-apache2/lib/Mail/SpamAssassin/Spamd/Apache2/AclIP.pm +++ spamd-apache2/lib/Mail/SpamAssassin/Spamd/Apache2/AclIP.pm @@ -6,6 +6,9 @@ use Apache2::Const -compile => qw(OK FORBIDDEN SERVER_ERROR); use Apache2::Module (); use Apache2::ServerRec (); +use Apache::Test; +use constant APACHE24 => have_min_apache_version('2.4.0'); + use Mail::SpamAssassin::Logger; =head1 NAME @@ -61,14 +64,17 @@ sub handler { # my $ip = NetAddr::IP::Lite->new($c->remote_ip) # or return Apache2::Const::SERVER_ERROR; # log it, shouldn't happen - my $remote = $c->remote_addr; + #use Apache::Test have_min_apache_version to support MP under Apache 2.2 and 2.4 + my $remote = APACHE24 ? $c->client_addr : $c->remote_addr; + for my $allowed (@{ $srv_cfg->{allowed_networks} }) { # depends on allowed_ips format; TODO; if NetAddr::IP::Lite: # return Apache2::Const::OK if $allowed->contains($ip); return Apache2::Const::OK if $allowed->test($remote); } - info(sprintf "access denied for '%s'", $c->remote_ip); + info(sprintf "access denied for '%s'", APACHE24 ? $c->client_ip : $c->remote_ip); + return Apache2::Const::FORBIDDEN; } diff --git spamd-apache2/lib/Mail/SpamAssassin/Spamd/Apache2/AclRFC1413.pm spamd-apache2/lib/Mail/SpamAssassin/Spamd/Apache2/AclRFC1413.pm index b3645207c..5df72c464 100644 --- spamd-apache2/lib/Mail/SpamAssassin/Spamd/Apache2/AclRFC1413.pm +++ spamd-apache2/lib/Mail/SpamAssassin/Spamd/Apache2/AclRFC1413.pm @@ -7,6 +7,9 @@ use Apache2::RequestUtil (); # RequestRec->new use Apache2::RequestRec (); use Apache2::Access (); # $r->get_remote_logname +use Apache::Test; +use constant APACHE24 => have_min_apache_version('2.4.0'); + use APR::SockAddr (); # $c->remote_addr->... use APR::Table (); # $c->notes @@ -69,8 +72,8 @@ sub handler { unless (defined $remote_user && length $remote_user) { warn 'rfc1413 check: failed to obtain info for ' - . $c->remote_addr->ip_get() . ':' - . $c->remote_addr->port() . "\n"; + . APACHE24 ? $c->client_addr->ip_get() : $c->remote_addr->ip_get() . ':' + . APACHE24 ? $c->client_addr->port() : $c->remote_addr->port() . "\n"; return Apache2::Const::FORBIDDEN; } @@ -93,14 +96,14 @@ sub check_ident { my ($c, $user) = @_; my $remote_user = $c->notes->{remote_user}; die "rfc1413 check: no query result for user=$user ip=" - . $c->remote_addr->ip_get() + . APACHE24 ? $c->client_addr->ip_get() : $c->remote_addr->ip_get() . ' port=' - . $c->remote_addr->port() + . APACHE24 ? $c->client_addr->port() : $c->remote_addr->port() unless defined $remote_user && length $remote_user; return $remote_user if $user eq $remote_user; warn "ident mismatch for [$user] from " - . $c->remote_addr->ip_get() . ':' - . $c->remote_addr->port() + . APACHE24 ? $c->client_addr->ip_get() : $c->remote_addr->ip_get() . ':' + . APACHE24 ? $c->client_addr->port() : $c->remote_addr->port() . "; remote identd returned [$remote_user]\n"; 0; } diff --git spamd/spamd.raw spamd/spamd.raw index 821f65840..5f7d1f208 100755 --- spamd/spamd.raw +++ spamd/spamd.raw @@ -245,7 +245,7 @@ use Mail::SpamAssassin::NetSet; use Mail::SpamAssassin::SubProcBackChannel; use Mail::SpamAssassin::SpamdForkScaling qw(:pfstates); use Mail::SpamAssassin::Logger qw(:DEFAULT log_message); -use Mail::SpamAssassin::Util qw(untaint_var untaint_file_path +use Mail::SpamAssassin::Util qw(untaint_var untaint_file_path secure_tmpdir exit_status_str am_running_on_windows); use Mail::SpamAssassin::Timeout; @@ -409,12 +409,12 @@ GetOptions( 'sql-config!' => \$opt{'sql-config'}, 'ssl' => \$opt{'ssl'}, 'ssl-port=s' => \$opt{'ssl-port'}, - 'ssl-version=s' => \$opt{'ssl-version'}, 'syslog-socket=s' => \$opt{'syslog-socket'}, 'syslog|s=s' => \$opt{'syslog'}, 'log-timestamp-fmt:s' => \$opt{'log-timestamp-fmt'}, 'timeout-tcp|T=i' => \$opt{'timeout-tcp'}, 'timeout-child|t=i' => \$opt{'timeout-child'}, + 'timing' => \$opt{'timing'}, 'user-config' => \$opt{'user-config'}, 'username|u=s' => \$opt{'username'}, 'version|V' => \$opt{'version'}, @@ -743,11 +743,6 @@ if ( defined $ENV{'HOME'} ) { # Do whitelist later in tmp dir. Side effect: this will be done as -u user. -my $sslversion = $opt{'ssl-version'} || 'sslv3'; -if ($sslversion !~ /^(?:sslv3|tlsv1)$/) { - die "spamd: invalid ssl-version: $opt{'ssl-version'}\n"; -} - $opt{'server-key'} ||= "$LOCAL_RULES_DIR/certs/server-key.pem"; $opt{'server-cert'} ||= "$LOCAL_RULES_DIR/certs/server-cert.pem"; @@ -898,9 +893,8 @@ sub compose_listen_info_string { $socket_info->{ip_addr}, $socket_info->{port})); } elsif ($socket->isa('IO::Socket::SSL')) { - push(@listeninfo, sprintf("SSL [%s]:%s, ssl version %s", - $socket_info->{ip_addr}, $socket_info->{port}, - $opt{'ssl-version'}||'sslv3')); + push(@listeninfo, sprintf("SSL [%r]:%s", $socket_info->{ip_addr}, + $socket_info->{port})); } } @@ -1071,7 +1065,6 @@ sub server_sock_setup_inet { $sockopt{V6Only} = 1 if $io_socket_module_name eq 'IO::Socket::IP' && IO::Socket::IP->VERSION >= 0.09; %sockopt = (%sockopt, ( - SSL_version => $sslversion, SSL_verify_mode => 0x00, SSL_key_file => $opt{'server-key'}, SSL_cert_file => $opt{'server-cert'}, @@ -1092,7 +1085,8 @@ sub server_sock_setup_inet { if (!$server_inet) { $diag = sprintf("could not create %s socket on [%s]:%s: %s", $ssl ? 'IO::Socket::SSL' : $io_socket_module_name, - $adr, $port, $!); + $adr, $port, $ssl && $IO::Socket::SSL::SSL_ERROR ? + "$!,$IO::Socket::SSL::SSL_ERROR" : $!); push(@diag_fail, $diag); } else { $diag = sprintf("created %s socket on [%s]:%s", @@ -1171,6 +1165,11 @@ my $spamtest = Mail::SpamAssassin->new( } ); +#Enable Timing? +if ($opt{'timing'}) { + $spamtest->timer_enable(); +} + # if $clients_per_child == 1, there's no point in copying configs around unless ($clients_per_child > 1) { # unset $copy_config_p so we don't bother trying to copy things back @@ -1456,7 +1455,12 @@ sub spawn { } undef $current_user; - dbg("timing: " . $spamtest->timer_report()) if would_log('dbg', 'timing'); + #LOG TIMING + if ($opt{'timing'}) { + info("timing: " . $spamtest->timer_report()); + } else { + dbg("timing: " . $spamtest->timer_report()) if would_log('dbg', 'timing'); + } } # If the child lives to get here, it will die ... Muhaha. @@ -2417,6 +2421,7 @@ sub got_compress_header { return 0; } $hdrs->{compress_zlib} = 1; + dbg("spamd: compress header received\n"); } else { protocol_error("(compression type not supported)"); @@ -3021,33 +3026,27 @@ sub ip_is_allowed { sub preload_modules_with_tmp_homedir { - # set $ENV{HOME} in /tmp while we compile and preload everything. - # File::Spec->tmpdir uses TMPDIR, TMP, TEMP, C:/temp, /tmp etc. - my $tmpdir = File::Spec->tmpdir(); - if ( !$tmpdir ) { - die "spamd: cannot find writable tmp dir, set TMP or TMPDIR in environment"; - } + # set $ENV{HOME} in a temp directory while we compile and preload everything. + my $tmphome = secure_tmpdir(); - # If TMPDIR isn't set, File::Spec->tmpdir() will set it to undefined. + # If TMPDIR isn't set, File::Spec->tmpdir() called by secure_tmpdir() may set it to undefined. # that then breaks other things ... + # If this is really necessary shouldn't secure_tmpdir() be doing it? delete $ENV{'TMPDIR'} if ( !defined $ENV{'TMPDIR'} ); - my $tmphome = File::Spec->catdir( $tmpdir, "spamd-$$-init" ); - $tmphome = untaint_file_path($tmphome); - my $tmpsadir = File::Spec->catdir( $tmphome, ".spamassassin" ); dbg("spamd: Preloading modules with HOME=$tmphome"); - # bug 5379: spamd won't start if the temp preloading dir exists; - # be sure to remove it just in case + if (!-d $tmphome) { + die "spamd: cannot create temp directory $tmphome: $!"; + } + + # bug 5379: spamd won't start if the temp preloading dir exists; check if exists and remove it + # This check should be unnecessary now that $tmphome created using File::Temp, but leave it just in case if (-d $tmpsadir) { rmdir( $tmpsadir ) or die "spamd: $tmpsadir not empty: $!"; } - if (-d $tmphome) { - rmdir( $tmphome ) or die "spamd: $tmphome not empty: $!"; - } - mkdir( $tmphome, 0700 ) or die "spamd: cannot create $tmphome: $!"; mkdir( $tmpsadir, 0700 ) or die "spamd: cannot create $tmpsadir: $!"; $ENV{HOME} = $tmphome; @@ -3232,13 +3231,13 @@ Options: -H [dir], --helper-home-dir[=dir] Specify a different HOME directory --ssl Enable SSL on TCP connections --ssl-port port Override --port setting for SSL connections - --ssl-version sslversion Specify SSL protocol version to use --server-key keyfile Specify an SSL keyfile --server-cert certfile Specify an SSL certificate --socketpath=path Listen on a given UNIX domain socket --socketowner=name Set UNIX domain socket file's owner --socketgroup=name Set UNIX domain socket file's group --socketmode=mode Set UNIX domain socket file's mode + --timing Enable timing and logging -V, --version Print version and exit The --listen option (or -i) may be specified multiple times, its syntax @@ -3720,14 +3719,6 @@ Optionally specifies the port number for the server to listen on for SSL connections (default: whatever --port uses). See B<--ssl> for more details. -=item B<--ssl-version>=I - -Specify the SSL protocol version to use, one of B or B. -The default, B, is the most flexible, accepting a SSLv3 or -higher hello handshake, then negotiating use of SSLv3 or TLSv1 -protocol if the client can accept it. Specifying B<--ssl-version> -implies B<--ssl>. - =item B<--server-key> I Specify the SSL key file to use for SSL connections. @@ -3774,6 +3765,12 @@ Set UNIX domain socket to use the octal mode I. Note that if C<-u> is used, that user should have write permissions to unlink the file later, for when the C server is killed. + +=item B<--timing> + + Enable timing measurements and output the information for logging. This + is the same information as provided by the TIMING tag. + =back =head1 SEE ALSO diff --git sql/README.txrep sql/README.txrep index 38b975bc4..34521cb35 100644 --- sql/README.txrep +++ sql/README.txrep @@ -94,3 +94,8 @@ schedule is below: DELETE FROM txrep WHERE last_hit <= (now() - INTERVAL 120 day); +For PostgreSQL, use the following: + +DELETE FROM txrep WHERE last_hit <= (now() - INTERVAL '120 day'); + + diff --git sql/txrep_pg.sql sql/txrep_pg.sql index 52c100317..0f73e9d3b 100644 --- sql/txrep_pg.sql +++ sql/txrep_pg.sql @@ -5,10 +5,23 @@ CREATE TABLE txrep ( count int(11) NOT NULL default '0', totscore float NOT NULL default '0', signedby varchar(255) NOT NULL default '', - last_hit timestamp NOT NULL default CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, - PRIMARY KEY (username,email,signedby,ip), - KEY last_hit (last_hit) + last_hit timestamp NOT NULL default CURRENT_TIMESTAMP, + PRIMARY KEY (username,email,signedby,ip) ); +create index txrep_last_hit on txrep (last_hit); + +create OR REPLACE function update_txrep_last_hit() +RETURNS TRIGGER AS $$ +BEGIN + NEW.last_hit = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ language 'plpgsql'; + +create TRIGGER update_txrep_update_last_hit BEFORE UPDATE +ON txrep FOR EACH ROW EXECUTE PROCEDURE +update_txrep_last_hit(); + ALTER TABLE txrep SET (fillfactor=95); diff --git t/data/nice/unicode1 t/data/nice/unicode1 new file mode 100644 index 000000000..5487d9b06 --- /dev/null +++ t/data/nice/unicode1 @@ -0,0 +1,28 @@ +Return-Path: +Received: from mail-ig0-x248.esempio-università.it + (mail-ig0-x248.esempio-università.it [IPv6:2001:db8::c05:248]) + (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) + (No client certificate requested) + by Sörensen.example.com (Postfix) with UTF8SMTPS + for ; Thu, 8 Oct 2015 07:45:14 +0200 (CEST) +From: =?ISO-8859-1?Q?Maril=F9?= Gioffré ♥ +To: =?iso-8859-1*sv?Q?D=F6rte_=C5._S=F6rensen,_Jr.?= + +Cc: θσερ@εχαμπλε.ψομ +Subject: =?iso-8859-2*sl?Q?Doma=e8e?= + =?utf-8*sl?Q?_omre=C5?= =?Utf-8*SL?q?=BEje?= +X-Note: The above split of UTF-8 char =C5 =BE is invalid, but seen in the wild +Date: Mon, 05 Oct 2015 12:00:00 +0200 +Message-ID: +MIME-Version: 1.0 +Content-Transfer-Encoding: quoted-printable +Content-Type: application/octet-stream; name= + "=?utf-8?B?0LTQvtC60YPQvNC10L3RgtGLINC00LvRjyDQvtGC0LTQ?= + =?utf-8?B?tdC70LAg0LrQsNC00YDQvtCyLnBkZg==?=" +Content-Disposition: attachment; filename= + "=?utf-8?B?0LTQvtC60YPQvNC10L3RgtGLINC00LvRjyDQvtGC0LTQ?= + =?utf-8?B?tdC70LAg0LrQsNC00YDQvtCyLnBkZg==?=" +X-Note: The above split of multibyte char across encoded-words is also invalid + +abc +def diff --git t/data/nice/unicode2 t/data/nice/unicode2 new file mode 100644 index 000000000..1614bf1c2 --- /dev/null +++ t/data/nice/unicode2 @@ -0,0 +1,10 @@ +From: test +To: test +Message-ID: <123@test.example.com> +Date: Thu, 16 Jun 2016 00:41:19 (UTC) +Subject: =?UTF-8?B?44CQ6YeN6KaB6KiK5oGv44CR5Y+w6Zu7MTA15bm0?= + =?UTF-8?B?M+aciOmbu+iyu++8jOWnlOiol+mHkeiejeapn+ani+aJow==?= + =?UTF-8?B?57mz5oiQ5Yqf6Zu75a2Q57mz6LK75oaR6K2JKOmbu+iZnw==?= + =?UTF-8?B?MDc0ODc2MTY3MzAp?= + +test diff --git t/header_utf8.t t/header_utf8.t new file mode 100755 index 000000000..7f7479c2b --- /dev/null +++ t/header_utf8.t @@ -0,0 +1,206 @@ +#!/usr/bin/perl + +use lib '.'; use lib 't'; +use SATest; sa_t_init("header_utf8.t"); + +use constant TEST_ENABLED => ($] >= 5.008); + +our $have_libidn; +BEGIN { + eval { require Net::LibIDN } and do { $have_libidn = 1 }; +} + +use Test; BEGIN { plan tests => (TEST_ENABLED ? 156 : 0) }; + +exit unless (TEST_ENABLED); + +# --------------------------------------------------------------------------- + +%mypatterns = ( + q{/ LT_RPATH /} => 'LT_RPATH', + q{/ LT_ENVFROM /} => 'LT_ENVFROM', + q{/ LT_FROM /} => 'LT_FROM', + q{/ LT_FROM_ADDR /} => 'LT_FROM_ADDR', + q{/ LT_FROM_NAME /} => 'LT_FROM_NAME', + q{/ LT_FROM_RAW /} => 'LT_FROM_RAW', + q{/ LT_TO_ADDR /} => 'LT_TO_ADDR', + q{/ LT_TO_NAME /} => 'LT_TO_NAME', + q{/ LT_CC_ADDR /} => 'LT_CC_ADDR', + q{/ LT_SUBJ /} => 'LT_SUBJ', + q{/ LT_SUBJ_RAW /} => 'LT_SUBJ_RAW', + q{/ LT_MESSAGEID /} => 'LT_MESSAGEID', + q{/ LT_MSGID /} => 'LT_MSGID', + q{/ LT_CT /} => 'LT_CT', + q{/ LT_CT_RAW /} => 'LT_CT_RAW', + q{/ LT_AUTH_DOM /} => 'LT_AUTH_DOM', + q{/ LT_NOTE /} => 'LT_NOTE', + q{/ LT_UTF8SMTP_ANY /} => 'LT_UTF8SMTP_ANY', + q{/ LT_SPLIT_UTF8_SUBJ /} => 'LT_SPLIT_UTF8_SUBJ', + q{/ USER_IN_BLACKLIST /} => 'USER_IN_BLACKLIST', +); + +%mypatterns_utf8 = ( # as it appears in a report body + q{/(?m)^ 0\.0 LT_ANY_CHARS \s*En-tête contient caractères$/} => 'LT_ANY_CHARS utf8', +); + +%mypatterns_mime_qp = ( # as it appears in a mail header section + q{/(?m)^\t\* 0\.0 LT_ANY_CHARS =\?UTF-8\?Q\?En-t=C3=AAte_contient_caract=C3=A8res\?=$/} => 'LT_ANY_CHARS mime encoded', +); + +%mypatterns_mime_b64 = ( # as it appears in a mail header section + q{/(?m)^\t\* 0\.0 LT_ANY_CHARS =\?UTF-8\?B\?5a2X56ym6KKr5YyF5ZCr5Zyo5raI5oGv5oql5aS06YOo5YiG\?=$/} => 'LT_ANY_CHARS mime encoded', +); + +%mypatterns_mime_b64_bug7307 = ( + q{/ LT_SUBJ2 /} => 'LT_SUBJ2', + q{/ LT_SUBJ2_RAW /} => 'LT_SUBJ2_RAW', +); + +%anti_patterns = ( + q{/ NO_RELAYS /} => 'NO_RELAYS', +# q{/ INVALID_MSGID /} => 'INVALID_MSGID', +); + +my $myrules = <<'END'; + add_header all AuthorDomain _AUTHORDOMAIN_ + blacklist_from Marilù.Gioffré@esempio-università.it + header LT_UTF8SMTP_ANY Received =~ /\bwith\s*UTF8SMTPS?A?\b/mi + score LT_UTF8SMTP_ANY -0.1 + header LT_RPATH Return-Path:addr =~ /^Marilù\.Gioffré\@esempio-università\.it\z/ + score LT_RPATH 0.01 + header LT_ENVFROM EnvelopeFrom =~ /^Marilù\.Gioffré\@esempio-università\.it\z/ + score LT_ENVFROM 0.01 + header LT_FROM From =~ /^Marilù Gioffré ♥ $/m + score LT_FROM 0.01 + header LT_FROM_ADDR From:addr =~ /^Marilù\.Gioffré\@esempio-università\.it\z/ + score LT_FROM_ADDR 0.01 + header LT_FROM_NAME From:name =~ /^Marilù Gioffré ♥\z/ + score LT_FROM_NAME 0.01 + header LT_FROM_RAW From:raw =~ /^\s*=\?ISO-8859-1\?Q\?Maril=F9\?= Gioffré ♥ $/m + score LT_FROM_RAW 0.01 + header LT_AUTH_DOM X-AuthorDomain =~ /^xn--esempio-universit-4ob\.it\z/ + score LT_AUTH_DOM 0.01 + header LT_TO_ADDR To:addr =~ /^Dörte\@Sörensen\.example\.com\z/ + score LT_TO_ADDR 0.01 + header LT_TO_NAME To:name =~ /^Dörte Å\. Sörensen, Jr\./ + score LT_TO_NAME 0.01 + header LT_CC_ADDR Cc:addr =~ /^θσερ\@εχαμπλε\.ψομ\z/ + score LT_CC_ADDR 0.01 + header LT_SUBJ Subject =~ /^Domače omrežje$/m + score LT_SUBJ 0.01 + header LT_SUBJ_RAW Subject:raw =~ /^\s*=\?iso-8859-2\*sl\?Q\?Doma=e8e\?=\s+=\?utf-8\*sl\?Q\?_omre=C5\?=/m + score LT_SUBJ_RAW 0.01 + header LT_SUBJ2 Subject =~ /^【重要訊息】台電105年3月電費,委託金融機構扣繳成功電子繳費憑證\(電號07487616730\)$/m + score LT_SUBJ2 0.01 + header LT_SUBJ2_RAW Subject:raw =~ /^\s*=\?UTF-8\?B\?44CQ6YeN6KaB6KiK5oGv44CR5Y\+w6Zu7MTA15bm0\?=\s*=\?UTF-8\?B\?M\+aciOmbu\+iyu\+\+8jOWnlOiol\+mHkeiejeapn\+ani\+aJow==\?=\s*=\?UTF-8\?B\?57mz5oiQ5Yqf6Zu75a2Q57mz6LK75oaR6K2JKOmbu\+iZnw==\?=\s*=\?UTF-8\?B\?MDc0ODc2MTY3MzAp\?=$/m + score LT_SUBJ2_RAW 0.01 + header LT_MSGID Message-ID =~ /^$/m + score LT_MSGID 0.01 + header LT_MESSAGEID MESSAGEID =~ /^$/m + score LT_MESSAGEID 0.01 + header LT_CT Content-Type =~ /документы для отдела кадров\.pdf/ + score LT_CT 0.01 + header LT_CT_RAW Content-Type:raw =~ /=\?utf-8\?B\?tdC70LAg0LrQsNC00YDQvtCyLnBkZg==\?="/ + score LT_CT_RAW 0.01 + header LT_SPLIT_UTF8_SUBJ Subject:raw =~ m{(=\?UTF-8) (?: \* [^?=<>, \t]* )? (\?Q\?) [^ ?]* =[89A-F][0-9A-F] \?= \s* \1 (?: \* [^ ?=]* )? \2 =[89AB][0-9A-F]}xsmi + score LT_SPLIT_UTF8_SUBJ 0.01 + header LT_NOTE X-Note =~ /^The above.*char =C5 =BE is invalid, .*wild$/m + score LT_NOTE 0.01 + header LT_ANY_CHARS From =~ /./ + score LT_ANY_CHARS 0.001 + describe LT_ANY_CHARS Header contains characters + lang fr describe LT_ANY_CHARS En-tête contient caractères + # sorry, Google translate: + lang zh describe LT_ANY_CHARS 字符被包含在消息报头部分 +END + +if (!$have_libidn) { + # temporary fudge to prevent a test failing + # until the Net::LibIDN becomes a mandatory module + $myrules =~ s{^(\s*header LT_AUTH_DOM\s+X-AuthorDomain =~)\s*(/.*/)$} + {$1 /esempio-università\.it/}m +} + +$ENV{PERL_BADLANG} = 0; # suppresses Perl warning about failed locale setting +# see Mail::SpamAssassin::Conf::Parser::parse(), also Bug 6992 +$ENV{LANGUAGE} = $ENV{LANG} = 'fr_CH.UTF-8'; + +#--- normalize_charset 1 + +tstlocalrules ($myrules . ' + report_safe 0 + normalize_charset 1 +'); + +%patterns = (%mypatterns, %mypatterns_mime_qp); +sarun ("-L < data/nice/unicode1", \&patterns_run_cb); +ok_all_patterns(); + +tstlocalrules ($myrules . ' + report_safe 1 + normalize_charset 1 +'); +%patterns = (%mypatterns, %mypatterns_utf8); +sarun ("-L < data/nice/unicode1", \&patterns_run_cb); +ok_all_patterns(); + +tstlocalrules ($myrules . ' + report_safe 2 + normalize_charset 1 +'); +%patterns = (%mypatterns, %mypatterns_utf8); +sarun ("-L < data/nice/unicode1", \&patterns_run_cb); +ok_all_patterns(); + +#--- normalize_charset 0 + +tstlocalrules ($myrules . ' + report_safe 0 + normalize_charset 0 +'); +%patterns = (%mypatterns, %mypatterns_mime_qp); +sarun ("-L < data/nice/unicode1", \&patterns_run_cb); +ok_all_patterns(); + +tstlocalrules ($myrules . ' + report_safe 1 + normalize_charset 0 +'); +%patterns = (%mypatterns, %mypatterns_utf8); +sarun ("-L < data/nice/unicode1", \&patterns_run_cb); +ok_all_patterns(); + +tstlocalrules ($myrules . ' + report_safe 2 + normalize_charset 0 +'); +%patterns = (%mypatterns, %mypatterns_utf8); +sarun ("-L < data/nice/unicode1", \&patterns_run_cb); +ok_all_patterns(); + +#--- base64 encoded-words + +$ENV{PERL_BADLANG} = 0; # suppresses Perl warning about failed locale setting +# see Mail::SpamAssassin::Conf::Parser::parse(), also Bug 6992 +$ENV{LANGUAGE} = $ENV{LANG} = 'zh_CN.UTF-8'; + +tstlocalrules ($myrules . ' + report_safe 0 + normalize_charset 1 +'); +%patterns = (%mypatterns, %mypatterns_mime_b64); +sarun ("-L < data/nice/unicode1", \&patterns_run_cb); +ok_all_patterns(); + +#--- base64 encoded-words - Bug 7307 + +$ENV{LANGUAGE} = $ENV{LANG} = 'en_US.UTF-8'; + +tstlocalrules ($myrules . ' + report_safe 0 + normalize_charset 1 +'); +%patterns = (%mypatterns_mime_b64_bug7307); +%anti_patterns = (); +sarun ("-L < data/nice/unicode2", \&patterns_run_cb); +ok_all_patterns(); diff --git t/make_install.t t/make_install.t index 79ebb3537..3ab2d1fc5 100644 --- t/make_install.t +++ t/make_install.t @@ -1,10 +1,14 @@ #!/usr/bin/perl -use lib '.'; use lib 't'; +use lib '.'; +use lib 't'; $ENV{'TEST_PERL_TAINT'} = 'no'; # inhibit for this test -use SATest; sa_t_init("make_install"); +use SATest; +sa_t_init("make_install"); -use Test; plan tests => 25; +use Config; +use Test; +plan tests => 25; BEGIN { if (-e 't/test_dir') { @@ -31,6 +35,10 @@ system_or_die "cd .. && make tardist"; system_or_die "cd $builddir && gunzip -cd $cwd/../Mail-SpamAssassin-*.tar.gz | tar xf -"; system_or_die "cd $builddir && mv Mail-SpamAssassin-* x"; +# Figure out where 'bin' really is +my $binpath = $Config{sitebinexp}; +$binpath =~ s|^\Q$Config{siteprefixexp}\E/||; + #Fix for RH/Fedora using lib64 instead of lib - bug 6609 $x64_bit_lib_test = 0; if (-e '/bin/rpm') { @@ -85,66 +93,72 @@ sub run_makefile_pl { # ------------------------------------------------------------------- new_instdir(__LINE__); -run_makefile_pl "PREFIX=$instdir/foo"; +my $prefix="$instdir/foo"; +run_makefile_pl "PREFIX=$prefix"; -ok -d "$instdir/foo/bin"; +ok -d "$prefix/$binpath"; if ($x64_bit_lib_test) { - #print "testing for $instdir/foo/lib64"; - ok -d "$instdir/foo/lib64"; + #print "testing for $prefix/lib64"; + ok -d "$prefix/lib64"; } else { - ok -d "$instdir/foo/lib"; + ok -d "$prefix/lib"; } -ok -e "$instdir/foo/share/spamassassin"; -ok -e "$instdir/foo/etc/mail/spamassassin"; +ok -e "$prefix/share/spamassassin"; +ok -e "$prefix/etc/mail/spamassassin"; # ------------------------------------------------------------------- new_instdir(__LINE__); -run_makefile_pl "PREFIX=$instdir/foo LIB=$instdir/bar"; +$prefix="$instdir/foo"; +run_makefile_pl "PREFIX=$prefix LIB=$instdir/bar"; -ok -e "$instdir/foo/bin"; +ok -d "$prefix/$binpath"; ok -e "$instdir/bar/Mail/SpamAssassin"; -ok -e "$instdir/foo/share/spamassassin"; -ok -e "$instdir/foo/etc/mail/spamassassin"; +ok -e "$prefix/share/spamassassin"; +ok -e "$prefix/etc/mail/spamassassin"; # ------------------------------------------------------------------- new_instdir(__LINE__); -run_makefile_pl "PREFIX=$instdir/foo LIB=$instdir/bar DATADIR=$instdir/data"; +$prefix="$instdir/foo"; +run_makefile_pl "PREFIX=$prefix LIB=$instdir/bar DATADIR=$instdir/data"; -ok -e "$instdir/foo/bin"; +ok -d "$prefix/$binpath"; ok -e "$instdir/bar/Mail/SpamAssassin"; ok -e "$instdir/data/sa-update-pubkey.txt"; -ok !-e "$instdir/foo/share/spamassassin"; -ok -e "$instdir/foo/etc/mail/spamassassin"; +ok !-e "$prefix/share/spamassassin"; +ok -e "$prefix/etc/mail/spamassassin"; # ------------------------------------------------------------------- new_instdir(__LINE__); -run_makefile_pl "PREFIX=$instdir/foo SYSCONFDIR=$instdir/sysconf"; +$prefix="$instdir/foo"; +run_makefile_pl "PREFIX=$prefix SYSCONFDIR=$instdir/sysconf"; -ok -e "$instdir/foo/bin"; +ok -d "$prefix/$binpath"; ok -e "$instdir/sysconf/mail/spamassassin/local.cf"; -ok -e "$instdir/foo/share/spamassassin/sa-update-pubkey.txt"; -ok !-e "$instdir/foo/etc/mail/spamassassin"; +ok -e "$prefix/share/spamassassin/sa-update-pubkey.txt"; +ok !-e "$prefix/etc/mail/spamassassin"; # ------------------------------------------------------------------- new_instdir(__LINE__); -run_makefile_pl "PREFIX=$instdir/foo CONFDIR=$instdir/conf"; +$prefix="$instdir/foo"; +run_makefile_pl "PREFIX=$prefix CONFDIR=$instdir/conf"; -ok -e "$instdir/foo/bin"; +ok -d "$prefix/$binpath"; ok -e "$instdir/conf/local.cf"; -ok -e "$instdir/foo/share/spamassassin/sa-update-pubkey.txt"; -ok !-e "$instdir/foo/etc/mail/spamassassin"; +ok -e "$prefix/share/spamassassin/sa-update-pubkey.txt"; +ok !-e "$prefix/etc/mail/spamassassin"; # ------------------------------------------------------------------- new_instdir(__LINE__); +$prefix="$instdir/dest/foo"; run_makefile_pl "DESTDIR=$instdir/dest PREFIX=/foo"; -ok -d "$instdir/dest/foo/bin"; -ok -d "$instdir/dest/foo/etc/mail/spamassassin"; +ok -d "$prefix/$binpath"; +ok -d "$prefix/etc/mail/spamassassin"; if ($x64_bit_lib_test) { - ok -d "$instdir/dest/foo/lib64"; + ok -d "$prefix/lib64"; } else { - ok -d "$instdir/dest/foo/lib"; + ok -d "$prefix/lib"; } -ok -e "$instdir/dest/foo/share/spamassassin/sa-update-pubkey.txt"; +ok -e "$prefix/share/spamassassin/sa-update-pubkey.txt"; diff --git t/prefs_include.t t/prefs_include.t index 2c7d359e2..598f042ab 100755 --- t/prefs_include.t +++ t/prefs_include.t @@ -2,7 +2,7 @@ use lib '.'; use lib 't'; use SATest; sa_t_init("prefs_include"); -use Test; BEGIN { plan tests => 2 }; +use Test; BEGIN { plan tests => 3 }; $ENV{'LANGUAGE'} = $ENV{'LC_ALL'} = 'C'; # a cheat, but we need the patterns to work @@ -10,9 +10,9 @@ $ENV{'LANGUAGE'} = $ENV{'LC_ALL'} = 'C'; # a cheat, but we need the %patterns = ( - q{X-Spam-Report: =?ISO-8859-1?Q? }, 'qp-encoded-hdr', - q{ Invalid Date: header =ae =af =b0 foo }, 'qp-encoded-desc', - + q{/(?m)^X-Spam-Report:\s*$/}, 'qp-encoded-hdr', + q{/(?m)^\t\*\s+[0-9.-]+ INVALID_DATE\s+Invalid Date: header =\?UTF-8\?B\?wq4gwq8gwrA=\?=$/}, 'qp-encoded-desc', + q{/(?m)^ [0-9.-]+ INVALID_DATE\s+Invalid Date: header ® ¯ °$/}, 'report-desc', ); tstprefs (" @@ -23,7 +23,7 @@ tstprefs (" open (OUT, ">log/prefs_include.inc") or die "open log/prefs_include.inc failed"; print OUT " report_safe 0 - describe INVALID_DATE Invalid Date: header \xae \xaf \xb0 foo + describe INVALID_DATE Invalid Date: header ® ¯ ° "; close OUT; diff --git t/rcvd_parser.t t/rcvd_parser.t index ce62d23fe..673ed09e4 100755 --- t/rcvd_parser.t +++ t/rcvd_parser.t @@ -18,7 +18,7 @@ if (-e 'test_dir') { # running from test directory, not .. use lib '.'; use lib 't'; use SATest; sa_t_init("rcvd_parser"); -use Test; BEGIN { plan tests => 146 }; +use Test; BEGIN { plan tests => 147 }; use strict; # format is: @@ -479,6 +479,10 @@ my %data = ( 'from [94.79.161.130] by 3capp-webde-bs01.dlan.cinetic.de (via HTTP); Tue, 11 Nov 2014 20:32:34 +0100' => '[ ip=94.79.161.130 rdns= helo= by=3capp-webde-bs01.dlan.cinetic.de ident= envfrom= id= auth=HTTP msa=0 ]', + #Bug 7213 + 'from mail1-backend.DDDD.com (LHLO mail2-backend.DDDD.com) (10.2.2.20) by mail3-backend.DDDD.com with LMTP; Thu, 18 Jun 2015 16:50:56 -0700 (PDT)' => + '[ ip=10.2.2.20 rdns=mail1-backend.DDDD.com helo=mail2-backend.DDDD.com by=mail3-backend.DDDD.com ident= envfrom= id= auth= msa=0 ]', + ); my $sa = create_saobj(); diff --git t/reportheader_8bit.t t/reportheader_8bit.t index bd00ee669..0d22470aa 100755 --- t/reportheader_8bit.t +++ t/reportheader_8bit.t @@ -2,7 +2,7 @@ use lib '.'; use lib 't'; use SATest; sa_t_init("reportheader"); -use Test; BEGIN { plan tests => 2 }; +use Test; BEGIN { plan tests => 3 }; $ENV{'LANGUAGE'} = $ENV{'LC_ALL'} = 'C'; # a cheat, but we need the patterns to work @@ -10,15 +10,16 @@ $ENV{'LANGUAGE'} = $ENV{'LC_ALL'} = 'C'; # a cheat, but we need the %patterns = ( - q{X-Spam-Report: =?ISO-8859-1?Q? }, 'qp-encoded-hdr', - q{ Invalid Date: header =ae =af =b0 foo }, 'qp-encoded-desc', + q{/(?m)^X-Spam-Report:\s*$/}, 'qp-encoded-hdr', + q{/(?m)^\t\*\s+[0-9.-]+ INVALID_DATE\s+Invalid Date: header =\?UTF-8\?B\?wq4gwq8gwrA=\?= foo$/}, 'qp-encoded-desc', + q{/(?m)^ [0-9.-]+ INVALID_DATE\s+Invalid Date: header ® ¯ ° foo$/}, 'report-desc', ); tstprefs (" $default_cf_lines report_safe 0 - describe INVALID_DATE Invalid Date: header \xae \xaf \xb0 foo + describe INVALID_DATE Invalid Date: header ® ¯ ° foo "); sarun ("-L -t < data/spam/001", \&patterns_run_cb); diff --git t/sa_compile.t t/sa_compile.t index ac1e7ca62..82b4057b5 100644 --- t/sa_compile.t +++ t/sa_compile.t @@ -9,7 +9,7 @@ use File::Basename; use File::Path qw/mkpath/; my $temp_binpath = $Config{sitebinexp}; -$temp_binpath =~ s/^\Q$Config{prefix}\E//; +$temp_binpath =~ s|^\Q$Config{siteprefixexp}\E/||; # called from BEGIN sub re2c_version_new_enough { @@ -79,7 +79,7 @@ sub set_rules { my $rules = shift; #Create the dir for the cf file - my $file = "$instdir/foo/share/spamassassin/20_testrules.cf"; + my $file = "$instdir/share/spamassassin/20_testrules.cf"; my $dir = dirname($file); mkpath($dir); @@ -95,7 +95,7 @@ sub set_rules { close RULES or die; #Create the dir for the pre file - $file = "$instdir/foo/etc/mail/spamassassin/v330.pre"; + $file = "$instdir/etc/mail/spamassassin/v330.pre"; $dir = dirname($file); mkpath($dir); @@ -126,11 +126,11 @@ sub set_rules { # ------------------------------------------------------------------- new_instdir("basic"); -$INST_FROM_SCRATCH and run_makefile_pl "PREFIX=$instdir/foo"; +$INST_FROM_SCRATCH and run_makefile_pl "PREFIX=$instdir SYSCONFDIR=$instdir/etc DATADIR=$instdir/share/spamassassin LOCALSTATEDIR=$instdir/var/spamassassin CONFDIR=$instdir/etc/mail/spamassassin"; # we now have an "installed" version we can run sa-compile with. Ensure # sarun() will use it appropriately -$scr = "$instdir/foo/$temp_binpath/spamassassin"; +$scr = "$instdir/$temp_binpath/spamassassin"; $scr_localrules_args = $scr_cf_args = ""; # use the default rules dir, from our "install" set_rules q{ @@ -140,7 +140,7 @@ set_rules q{ }; # ensure we don't use compiled rules -system("rm -rf $instdir/foo/var/spamassassin/compiled"); +system("rm -rf $instdir/var/spamassassin/compiled"); %patterns = ( q{ check: tests=FOO }, 'FOO' @@ -152,14 +152,14 @@ clear_pattern_counters(); # ------------------------------------------------------------------- -system_or_die "$instdir/foo/$temp_binpath/sa-compile --keep-tmps"; # --debug +system_or_die "$instdir/$temp_binpath/sa-compile --keep-tmps"; # --debug %patterns = ( q{ able to use 1/1 'body_0' compiled rules }, 'able-to-use', q{ check: tests=FOO }, 'FOO' ); -$scr = "$instdir/foo/$temp_binpath/spamassassin"; +$scr = "$instdir/$temp_binpath/spamassassin"; $scr_localrules_args = $scr_cf_args = ""; # use the default rules dir, from our "install" ok sarun ("-D -Lt < $cwd/data/spam/001 2>&1", \&patterns_run_cb); ok_all_patterns(); diff --git t/spamd_ssl.t t/spamd_ssl.t index a3b343ac5..85f152313 100755 --- t/spamd_ssl.t +++ t/spamd_ssl.t @@ -2,10 +2,7 @@ use lib '.'; use lib 't'; use SATest; sa_t_init("spamd_ssl"); -use Test; plan tests => (($SKIP_SPAMD_TESTS || !$SSL_AVAILABLE) ? 0 : 9), - onfail => sub { - warn "\n\nNote: This may not be a SpamAssassin bug, as some platforms require that you" . - "\nspecify a protocol in spamc --ssl option, and possibly in spamd --ssl-version.\n\n" }; +use Test; plan tests => (($SKIP_SPAMD_TESTS || !$SSL_AVAILABLE) ? 0 : 9); exit if ($SKIP_SPAMD_TESTS || !$SSL_AVAILABLE); diff --git t/spamd_ssl_accept_fail.t t/spamd_ssl_accept_fail.t index a650a6e80..a4d463722 100755 --- t/spamd_ssl_accept_fail.t +++ t/spamd_ssl_accept_fail.t @@ -23,9 +23,9 @@ q{ This must be the very last line}, 'lastline', ); -ok (start_spamd ("-L --ssl --ssl-version=sslv3 --server-key data/etc/testhost.key --server-cert data/etc/testhost.cert")); +ok (start_spamd ("-L --ssl --server-key data/etc/testhost.key --server-cert data/etc/testhost.cert")); ok (spamcrun ("< data/spam/001", \&patterns_run_cb)); -ok (spamcrun ("--ssl=sslv3 < data/spam/001", \&patterns_run_cb)); +ok (spamcrun ("--ssl < data/spam/001", \&patterns_run_cb)); ok (stop_spamd ()); ok_all_patterns(); diff --git t/spamd_ssl_tls.t t/spamd_ssl_tls.t deleted file mode 100755 index a9e0e617f..000000000 --- t/spamd_ssl_tls.t +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/perl - -use lib '.'; use lib 't'; -use SATest; sa_t_init("spamd_ssl_tls"); -use Test; plan tests => (($SKIP_SPAMD_TESTS || !$SSL_AVAILABLE) ? 0 : 9); - -exit if ($SKIP_SPAMD_TESTS || !$SSL_AVAILABLE); - -# --------------------------------------------------------------------------- - -%patterns = ( - -q{ Return-Path: sb55sb55@yahoo.com}, 'firstline', -q{ Subject: There yours for FREE!}, 'subj', -q{ X-Spam-Status: Yes, score=}, 'status', -q{ X-Spam-Flag: YES}, 'flag', -q{ X-Spam-Level: **********}, 'stars', -q{ TEST_ENDSNUMS}, 'endsinnums', -q{ TEST_NOREALNAME}, 'noreal', -q{ This must be the very last line}, 'lastline', - - -); - -ok (sdrun ("-L --ssl --ssl-version=tlsv1 --server-key data/etc/testhost.key --server-cert data/etc/testhost.cert", - "--ssl=tlsv1 < data/spam/001", - \&patterns_run_cb)); -ok_all_patterns(); diff --git t/spamd_ssl_v3.t t/spamd_ssl_v3.t deleted file mode 100755 index bbff2deff..000000000 --- t/spamd_ssl_v3.t +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/perl - -use lib '.'; use lib 't'; -use SATest; sa_t_init("spamd_sslv3"); -use Test; plan tests => (($SKIP_SPAMD_TESTS || !$SSL_AVAILABLE) ? 0 : 9); - -exit if ($SKIP_SPAMD_TESTS || !$SSL_AVAILABLE); - -# --------------------------------------------------------------------------- - -%patterns = ( - -q{ Return-Path: sb55sb55@yahoo.com}, 'firstline', -q{ Subject: There yours for FREE!}, 'subj', -q{ X-Spam-Status: Yes, score=}, 'status', -q{ X-Spam-Flag: YES}, 'flag', -q{ X-Spam-Level: **********}, 'stars', -q{ TEST_ENDSNUMS}, 'endsinnums', -q{ TEST_NOREALNAME}, 'noreal', -q{ This must be the very last line}, 'lastline', - - -); - -ok (sdrun ("-L --ssl --ssl-version=sslv3 --server-key data/etc/testhost.key --server-cert data/etc/testhost.cert", - "--ssl=sslv3 < data/spam/001", - \&patterns_run_cb)); -ok_all_patterns(); diff --git t/uri_text.t t/uri_text.t index b5b872bef..5869c6752 100755 --- t/uri_text.t +++ t/uri_text.t @@ -304,7 +304,7 @@ example.ag ^http://example\.ag$ example.ai ^http://example\.ai$ example.al ^http://example\.al$ example.am ^http://example\.am$ -example.an ^http://example\.an$ +example.an !^http://example\.an$ example.ao ^http://example\.ao$ example.aq ^http://example\.aq$ example.ar ^http://example\.ar$ @@ -580,7 +580,7 @@ www.example.ag ^http://www\.example\.ag$ www.example.ai ^http://www\.example\.ai$ www.example.al ^http://www\.example\.al$ www.example.am ^http://www\.example\.am$ -www.example.an ^http://www\.example\.an$ +www.example.an !^http://www\.example\.an$ www.example.ao ^http://www\.example\.ao$ www.example.aq ^http://www\.example\.aq$ www.example.ar ^http://www\.example\.ar$ diff --git tools/sare-sa-stats.pl tools/sare-sa-stats.pl new file mode 100644 index 000000000..548226920 --- /dev/null +++ tools/sare-sa-stats.pl @@ -0,0 +1,333 @@ +#!/usr/bin/perl + +# @hourly /usr/local/bin/sa-stats.pl --web --n=25 > /var/www/html/spamstat/index.html + + +# ------------------------------------------------------------- +# file: sa-stats.pl (SARE release) +# created: 2005-01-31 +# updated: 2007-01-30 +# version: 1.03 +# author: Dallas +# desc: Generates Top Spam/Ham Rules fired for SA 3.1.x installations. +# +# IMPORTANT NOTES +# +# SA 3.0.x log files do not have user= in +# the report: log entries, so this does not work with 3.0. +# See http://www.rulesemporium.com/programs/sa-stats.txt for +# a SA 3.0.x version ( no per-domain / per-user support ) +# +# If your top 5 does not contain URIBL_BLACK, see +# http://www.uribl.com/usage.shtml +# ------------------------------------------------------------- + +# Per User and Per Domain Statistics... +# ------------------------------------------------------------- +# +# ./sa-stats -r postmaster +# - this would give all stats for postmaster users, +# regardless of which domain it was for. handy if you +# have alot of domain aliases +# +# ./sa-stats -r @domain +# - this would give all stats for the domain specified. +# make sure you include the '@' sign before the +# domain or the script will assume you wanted a user +# name instead. +# +# ./sa-stats -r user@domain.com +# - this would give all stats for a specific email address. +# this assumes you pass 'spamc -u ' vs. +# 'spamc -u '. If you do the latter, you simply +# want to call -r instead. +# +# ------------------------------------------------------------- + +use Getopt::Long; +use Pod::Usage; + +my ($LOG_DIR,$FILE,$TOPRULES,$PRINT_TO_WEB,$HELP,$RECIP); + +GetOptions ( + 'logdir|l=s' => \$LOG_DIR, + 'filename|f=s' => \$FILE, + 'recip|r=s' => \$RECIP, + 'num|n=i' => \$TOPRULES, + 'web|w' => \$PRINT_TO_WEB, + 'help|h' => \$HELP +); + +if ($HELP) { + print "usage: $0 [-l

] [-f ] [-n ] [-w]\n"; + print "\t--logdir|-l \tDirectory containing spamd logs\n"; + print "\t--filename|-f \tFile names or regex to look for in the logdir\n"; + print "\t--num|-n \tNumber of top rules to display\n"; + print "\t--web|-w\tMake it web friendly output\n"; + print "\t--help|-h\tPrints this help\n"; + exit; +} + +if (!defined $TOPRULES) { $TOPRULES=20 } +if (!defined $LOG_DIR) { $LOG_DIR="/var/log" } +if (!defined $FILE) { $FILE='^maillog$' } # regex + +# LEAVE THE REST ALONE UNLESS YOU KNOW WHAT YOU ARE DOING... +################################################################ + +my $NUM_EMAIL=0; my $NUM_SPAM=0; my $NUM_HAM=0; +my $EMAIL_HITS=0; my $SPAM_HITS=0; my $HAM_HITS=0; +my %SPAM_RULES=(); my %HAM_RULES=(); +my $TOTAL_SPAM_RULES=0; my $TOTAL_HAM_RULES=0; +my $ALSPAM=0; my $ALHAM=0; my $ALNO=0; +my $HAM_SEC=0; my $SPAM_SEC=0; my $EMAIL_SEC=0; + +my $footer = ''; + +opendir (DIR,"$LOG_DIR"); +my @logs = grep /$FILE/i, readdir DIR; +closedir DIR; + +foreach my $log (@logs) { + &calcstats($LOG_DIR."/".$log); +} + +&summarize(); +exit; + +############################# + +sub calcstats { + + my $log=shift; + + if (!-e $log || -d $log) { + print "$log not found..\n"; + return; + } + + open(F,"$log"); + while() { + + my ($result,$score,$rules,$time,$size,$learn,$recip); + my $spam=0; + # for user=, it may be %domain or $GLOBAL or @GLOBAL or user@domain.. + + + if (/.*result:\s+(\w|\.)\s+(\-?\d+)\s+\-\s+(.*)\s+scantime\=([\d\.]+)\,size\=(\d+).*user=([^\,]+).*autolearn=(\w+)/) { + $result=$1; + $score=$2; + $rules=$3; + $time=$4; + $size=$5; + $recip=$6; + $learn=$7; + } + else { + next; + } + + my ($user,$domain); + + if ($recip =~ m/^[\%\@](.+)/) { + $user = undef; + $domain = '@'.$1; + } + if ($recip =~ m/(.+)\@(.+)/) { + $user=$1; + $domain='@'.$2; + } + else { + $user=$recip; + $domain='@localhost'; + } + + my $email = $user.$domain; + + + next if ($RECIP && $RECIP !~ m/\@/ && $RECIP ne $user); + next if ($RECIP =~ m/^[\%\@](.+)/ && $RECIP ne $domain); + next if ($RECIP =~ m/(.+)\@(.+)/ && $RECIP ne $email); + + if ($result eq "Y") { + $SPAM_SEC+=$time; + } + else { + $HAM_SEC+=$time; + } + $EMAIL_SEC+=$time; + + $spam=1 if ($result =~ m/Y/); + if ($learn =~ /ham/) { + $ALHAM++; + } + elsif ($learn =~ /spam/) { + $ALSPAM++; + } + else { + $ALNO++; + } + + my @tmprules=split(/\,/,$rules); + foreach my $r (@tmprules) { + if ($spam) { + $TOTAL_SPAM_RULES++; + if (defined $SPAM_RULES{$r}) { + $SPAM_RULES{$r}++; + } + else { + $SPAM_RULES{$r}=1; + } + } + else { + $TOTAL_HAM_RULES++; + if (defined $HAM_RULES{$r}) { + $HAM_RULES{$r}++; + } + else { + $HAM_RULES{$r}=1; + } + } + } + + if ($spam) { + $NUM_SPAM++; + $SPAM_HITS += $score; + } + else { + $NUM_HAM++; + $HAM_HITS += $score; + } + $NUM_EMAIL++; + $EMAIL_HITS += $score; +} +close(F); + +} + + +sub summarize { + + my ($avgspamhits,$avghamhits,$avgemailhits); + + print "Content-type: text/html\n\n" if ($PRINT_TO_WEB); + print "
" if ($PRINT_TO_WEB);
+
+  if ($NUM_SPAM > 0) {
+     $avgspamhits= sprintf("%.2f",$SPAM_HITS/$NUM_SPAM);
+     $avgspamtime= sprintf("%.2f",$SPAM_SEC/$NUM_SPAM);
+  }
+  else {
+     $avgspamhits=0;
+     $avgspamtime=0;
+  }
+
+  if ($NUM_HAM > 0) {
+     $avghamhits= sprintf("%.2f",$HAM_HITS/$NUM_HAM);
+     $avghamtime= sprintf("%.2f",$HAM_SEC/$NUM_HAM);
+  }
+  else {
+     $avghamhits=0;
+     $avghamtime=0;
+  }
+
+  if ($NUM_EMAIL > 0) {
+     $avgemailhits= sprintf("%.2f",$EMAIL_HITS/$NUM_EMAIL);
+     $avgemailtime= sprintf("%.2f",$EMAIL_SEC/$NUM_EMAIL);
+  }
+  else {
+     $avgemailhits=0;
+     $avgemailtime=0;
+  }
+
+
+  print "\n\n";
+
+  if ($RECIP) {
+    print "SPAM STATS FOR $RECIP\n";
+    print "-" x 60 . "\n";
+  }
+
+  my $ALTOT=$ALSPAM+$ALHAM;
+  printf("Email: %8s  Autolearn: %5s  AvgScore: %6.2f  AvgScanTime: %5.2f sec\n",$NUM_EMAIL,$ALTOT,$avgemailhits,$avgemailtime);
+  printf("Spam:  %8s  Autolearn: %5s  AvgScore: %6.2f  AvgScanTime: %5.2f sec\n",$NUM_SPAM,$ALSPAM,$avgspamhits,$avgspamtime);
+  printf("Ham:   %8s  Autolearn: %5s  AvgScore: %6.2f  AvgScanTime: %5.2f sec\n",$NUM_HAM,$ALHAM,$avghamhits,$avghamtime);
+
+  &br;
+  printf "Time Spent Running SA:      %7.2f hours\n",$EMAIL_SEC/60/60;
+  printf "Time Spent Processing Spam: %7.2f hours\n",$SPAM_SEC/60/60;
+  printf "Time Spent Processing Ham:  %7.2f hours\n",$HAM_SEC/60/60;
+
+  &br;
+
+  my $count=0;
+  print "TOP SPAM RULES FIRED";
+  print " FOR $RECIP" if ($RECIP);
+  print "\n";
+
+  &hr;
+  printf("%4s\t%-24s\t%5s %8s %7s %7s %7s\n","RANK","RULE NAME","COUNT","\%OFMAIL","\%OFSPAM","\%OFHAM");
+  &hr;
+  foreach my $key (sort { $SPAM_RULES{$b} <=> $SPAM_RULES{$a} } keys %SPAM_RULES) {
+    #my $perc1=sprintf("%.2f",($SPAM_RULES{$key}/$NUM_EMAIL)*100);
+    my $perc1=sprintf("%.2f",(($SPAM_RULES{$key}+$HAM_RULES{$key})/$NUM_EMAIL)*100);
+    my $perc2=sprintf("%.2f",($SPAM_RULES{$key}/$NUM_SPAM)*100);
+    my $perc3=sprintf("%.2f",($HAM_RULES{$key}/$NUM_HAM)*100);
+    printf("%4d\t%-24s\t%5s\t%6.2f\t%6.2f\t%6.2f\n",$count+1,$key,$SPAM_RULES{$key},$perc1,$perc2,$perc3);
+    $count++;
+    if ($count >= $TOPRULES && $TOPRULES > 0) {
+       last;
+    }
+  }
+  &hr;
+  &br;
+
+  $count=0;  # thanks mike.
+  print "TOP HAM RULES FIRED";
+  print " FOR $RECIP" if ($RECIP);
+  print "\n";
+  &hr;
+  printf("%4s\t%-24s\t%5s %8s %7s %7s %7s\n","RANK","RULE NAME","COUNT","\%OFMAIL","\%OFSPAM","\%OFHAM");
+  &hr;
+  foreach my $key (sort { $HAM_RULES{$b} <=> $HAM_RULES{$a} } keys %HAM_RULES) {
+    #my $perc1=sprintf("%.2f",($HAM_RULES{$key}/$NUM_EMAIL)*100);
+    my $perc1=sprintf("%.2f",(($SPAM_RULES{$key}+$HAM_RULES{$key})/$NUM_EMAIL)*100);
+    my $perc2=sprintf("%.2f",($SPAM_RULES{$key}/$NUM_SPAM)*100);
+    my $perc3=sprintf("%.2f",($HAM_RULES{$key}/$NUM_HAM)*100);
+    printf("%4d\t%-24s\t%5s\t%6.2f\t%6.2f\t%6.2f\n",$count+1,$key,$HAM_RULES{$key},$perc1,$perc2,$perc3);
+    $count++;
+    if ($count >= $TOPRULES && $TOPRULES > 0) {
+       last;
+    }
+  }
+  &hr;
+  &br;
+  print "
\n" if ($PRINT_TO_WEB); + print $footer if ($PRINT_TO_WEB && $footer ne ""); + print "\n"; +} + +####################### +sub hr { + if ($PRINT_TO_WEB) { + print "
"; + } + else { + print "-" x 70 ."\n"; + } +} +####################### +sub br { + if ($PRINT_TO_WEB) { + print "
"; + } + else { + print "\n"; + } +} + + + + + +