lib/Exim/Grey.pm
changeset 74 adf33377005c
parent 73 e2559ee78cb3
equal deleted inserted replaced
73:e2559ee78cb3 74:adf33377005c
     1 package Exim::Grey;
       
     2 # for usage please see at the end
       
     3 
       
     4 use strict;
       
     5 use warnings;
       
     6 use base 'Exporter';
       
     7 use Carp;
       
     8 
       
     9 our @EXPORT_OK   = qw(unseen seen getDBDir connectDB getDefault);
       
    10 our %EXPORT_TAGS = (all => \@EXPORT_OK,);
       
    11 our $VERSION     = '2.0';
       
    12 
       
    13 our $verbose;
       
    14 
       
    15 sub verbose {
       
    16     return if not $verbose;
       
    17     print STDERR __PACKAGE__ . ': ' . map { s/\0//gr } @_, "\n";
       
    18 }
       
    19 
       
    20 sub exim_bool { $_[0] ? 'yes' : 'no' }
       
    21 
       
    22 # You may choose, but DB_File's footprint is smaller.
       
    23 # perl -MDB_File -e 'tie %h, ...':	real    0m0.063s
       
    24 # perl -MBerkeleyDB -e 'tie %h, ...':	real	0m0.112s
       
    25 # And DB_File is part of the Perl core distribution (?)
       
    26 # use BerkeleyDB;
       
    27 # use DB_File;
       
    28 #
       
    29 # But we need locking! DB_File::Lock isn't part of the corelist.
       
    30 use DB_File::Lock;
       
    31 
       
    32 my %DEFAULT = (
       
    33     delay => 600,
       
    34     db    => 'seen',
       
    35 );
       
    36 
       
    37 # some helper functions
       
    38 sub getDBDir();
       
    39 sub findExim(;$);
       
    40 sub connectDB($$);
       
    41 sub getDefault() { %DEFAULT }
       
    42 
       
    43 # dbm file is relativ to $spool_directory/grey, EXCEPT its name
       
    44 # starts with "/".
       
    45 
       
    46 sub unseen_ {
       
    47     my $item   = shift;
       
    48     my $delay  = shift // $DEFAULT{delay};
       
    49     my $db     = shift // $DEFAULT{db};
       
    50     my $now    = time();
       
    51     my ($auto) = $item =~ /.*?\/(.+?)$/ # remember the /<autokey> from the item
       
    52         and $item =~ s/\/.*?$//;        # and remove it from the item
       
    53     my $rc;
       
    54 
       
    55     connectDB(\my %h, $db);
       
    56 
       
    57     return 1    # not unseen, ergo known
       
    58       if defined $auto and is_whitelisted($auto, \%h);
       
    59 
       
    60     my $key = "$item\0";                # for compatibility with Exim's dbm functions
       
    61 
       
    62     # We do not know anything about the client -> unknown.
       
    63     # But remember that key with the associated "auto" subkey
       
    64     if (not exists $h{$key}) {
       
    65         $h{$key} = serialize(t0 => $now, t1 => $now, count => 0, auto => [defined $auto ? $auto : ()]);
       
    66 
       
    67         verbose "unseen: $item" if $verbose;
       
    68         return 1;
       
    69     }
       
    70 
       
    71     my %entry = deserialize($h{$key});
       
    72 
       
    73     # we know the client, but last contact was recently (too fast)
       
    74     # should we add it to our list auto entries too?
       
    75     if ($now - $entry{t0} < $delay) {
       
    76         return 1;
       
    77     }
       
    78 
       
    79     # we know the client, was patiently enough
       
    80     whitelist(\%h, uniq($auto, $entry{auto})) if defined $auto;
       
    81     $entry{count}++;
       
    82     $h{$key} = $_ = serialize(%entry);
       
    83     verbose "seen: $_" if $verbose;
       
    84     return 0;
       
    85 }
       
    86 
       
    87 sub unseen { exim_bool unseen_ @_ }
       
    88 sub seen { exim_bool !unseen_ @_ }
       
    89 
       
    90 # According to a thought from "David Woodhouse <dwmw2@infradead.org>"
       
    91 # on exim-users@exim.org (Thu, 08 May 2008 13:10:21 +0100,
       
    92 # Message-Id: <1210248621.25560.1088.camel@pmac.infradead.org>) we
       
    93 # should have the ability to "auto whitelist" hosts which are known
       
    94 # for retries, because there is no benefit in greylisting them.
       
    95 #
       
    96 # Most safe approach would be something based on message id.
       
    97 # If we see the message id a second time it indicates successful retry.
       
    98 # But we do not see the message id the first time we reject the message.
       
    99 
       
   100 # This function has to be called twice per message delivery attempt
       
   101 # <KEY> <$sender_host_address> <$sender_helo_name>
       
   102 # (Where <KEY> is something like <$sender_address>+<$local_part@$domain>
       
   103 # If we see the same message a second time (same message means here:
       
   104 # same greylist criteria
       
   105 
       
   106 sub whitelist {
       
   107     my ($h, @items) = (shift, uniq(@_));
       
   108     my $now = time;
       
   109     warn __PACKAGE__ . ": whitelist: @items\n"
       
   110         if $verbose;
       
   111     $h->{"$_\0"} = "$now $now 1 auto\0"
       
   112         foreach uniq(@items);
       
   113 }
       
   114 
       
   115 sub uniq {
       
   116     my %h = map { $_, undef } @_;
       
   117     return keys %h;
       
   118 }
       
   119 
       
   120 sub is_whitelisted {
       
   121     my ($item, $h) = @_;
       
   122     my $key = "$item\0";
       
   123 
       
   124     warn __PACKAGE__ . 'is '
       
   125         . (exists $h->{$key} ? '' : 'not')
       
   126         . "whitelisted: $item\n" if $verbose;
       
   127 
       
   128     return 0 if not exists $h->{$key};
       
   129 
       
   130     my ($t0, undef, $cnt, $flag) = split /[ \0]/, $h->{$key};
       
   131     $h->{$key} = join(' ' => $t0, time, ++$cnt, $flag ? $flag : ()) . "\0";
       
   132 
       
   133 
       
   134     return 1;
       
   135 }
       
   136 
       
   137 # Get the directory where we could store the database file(s)
       
   138 # If we're running under exim it's easy, otherwise we've to find exim
       
   139 # and then ask...
       
   140 sub getDBDir() {
       
   141     my ($spooldir, $dbdir);
       
   142     eval { $spooldir = Exim::expand_string('$spool_directory') };
       
   143     if (not defined $spooldir) {
       
   144         my $exim = findExim();
       
   145         chomp($spooldir = `$exim -be '\$spool_directory'`);
       
   146         die "Can't find spooldir" if not defined $spooldir;
       
   147     }
       
   148     -d ($dbdir = "$spooldir/grey") and return $dbdir;
       
   149 
       
   150     my ($mode, $owner, $group) = (stat $spooldir)[2, 4, 5];
       
   151     {
       
   152         local $) = $group;
       
   153         local $> = $owner;
       
   154         $mode &= 0777;
       
   155         mkdir $dbdir, $mode or die "Can't create $dbdir: $!";
       
   156     }
       
   157     return $dbdir;
       
   158 }
       
   159 
       
   160 sub findExim(;$) {
       
   161     my $path = shift || $ENV{PATH};
       
   162     my $exim;
       
   163     foreach (split /:/, $ENV{PATH}) {
       
   164         -x ($exim = "$_/exim")  and return $exim;
       
   165         -x ($exim = "$_/exim4") and return $exim;
       
   166     }
       
   167     die "Can't find exim binary (missing .../sbin dirs in PATH?";
       
   168 }
       
   169 
       
   170 sub connectDB($$) {
       
   171     my ($h, $db) = @_;
       
   172     $db = getDBDir() . "/$db" unless $db =~ m(^/);
       
   173 
       
   174     # Creation of DB-File if it doesn't exist
       
   175     # to avoid races we change our own uid/gid for creation of
       
   176     # this file.
       
   177     if (!-f $db) {
       
   178         (my $dir = $db) =~ s/^(.*)\/.*?$/$1/;
       
   179 
       
   180         # copy mode, uid, gid from the directory
       
   181         my ($mode, $user, $group) = (stat $dir)[2, 4, 5]
       
   182           or die "Can't stat $dir: $!";
       
   183         my $umask = umask(($mode & 0777) ^ 0777);
       
   184         local $) = $group;
       
   185         local $> = $user;
       
   186         open(X, ">>$db") or die "Can't create $db: $!";
       
   187         close(X);
       
   188         umask $umask;
       
   189     }
       
   190 
       
   191     # now test which of the DB-Modules has been loaded
       
   192 
       
   193     if (exists &BerkeleyDB::Hash::TIEHASH) {
       
   194         no strict;
       
   195         my $umask = umask 077;
       
   196         tie %$h, "BerkeleyDB::Hash", -Filename => $db
       
   197           or die "$0: $db: $!";
       
   198         return $db;
       
   199     }
       
   200 
       
   201     if (exists &DB_File::Lock::TIEHASH) {
       
   202         tie %$h, 'DB_File::Lock', [$db], 'write'
       
   203           or die "$0: $db: $!";
       
   204         return $db;
       
   205     }
       
   206 
       
   207     if (exists &DB_File::TIEHASH) {
       
   208         tie %$h, 'DB_File', $db or die "$0: $db: $!";
       
   209         warn "$0: using DB_File, no locking is possible!\n";
       
   210         return $db;
       
   211     }
       
   212 
       
   213     die "Can't connect to database driver";
       
   214 }
       
   215 
       
   216 # These two functions do not truly serialize/de-serialize the data
       
   217 # passed. They're specialiased to a fixed data format:
       
   218 # serialized: <t0> <t1> <count> [auto=<item>[,<item>]...]
       
   219 # structured: (
       
   220 #   t0 => <t0>,
       
   221 #   t1 => <t1>,
       
   222 #   count => <count>,
       
   223 #   auto => [item, item, …],
       
   224 #   )
       
   225 sub serialize {
       
   226     my %data = @_;
       
   227     my $auto = (ref $data{auto} && @{$data{auto}}) ?  join ',', @{$data{auto}} : '';
       
   228     return "$data{t0} $data{t1} $data{count} auto=$auto\0";
       
   229 }
       
   230 
       
   231 sub deserialize {
       
   232     my @data = split / /, $_[0] =~ s/\0$//r;
       
   233     my %data;
       
   234     ($data{t0}, $data{t1}, $data{count}) = splice @data, 0, 3;
       
   235     if ($data[0] =~ /^auto=(.*)/) {
       
   236         $data{auto} = [split /,/, $1];
       
   237     }
       
   238     return %data;
       
   239 }
       
   240 
       
   241 1;
       
   242 
       
   243 __END__
       
   244 =head1 NAME
       
   245 
       
   246  Exim::Grey
       
   247 
       
   248 =head1 SYNOPSIS
       
   249 
       
   250  perl_startup use Exim::Grey qw(unseen);
       
   251  ...
       
   252  acl rcpt
       
   253     defer condition = ${perl{unseen}{<$sender_address>:<$local_part@$domain>}}
       
   254 
       
   255 =head1 DESCRIPTION
       
   256 
       
   257 This is a module to be loade by Exim, the MTA. On request it exports
       
   258 a single function C<unseen()>. This function may be used in the ACL section
       
   259 to support greylisting.
       
   260 
       
   261 =head1 FUNCTIONS
       
   262 
       
   263 =over
       
   264 
       
   265 =item scalar B<unseen>(I<key>, I<delay>, I<db>)
       
   266 
       
   267 This function returns I<true> if the key is already known in the I<db> database
       
   268 for the minimum I<delay> time. (Note: The database may be cleaned regularly by
       
   269 the compangion L<exigrey> tool.)
       
   270 
       
   271 The I<key> is mandotory, the default I<delay> is 600 seconds and the default I<db>
       
   272 is called F<seen>.
       
   273 
       
   274 I<Key> may contain a suffix, separated by "/". This suffix is used for
       
   275 automatic whitelisting.
       
   276 
       
   277 =item scalar B<seen>(I<key>, I<delay>, I<db>)
       
   278 
       
   279 The same as C<unseen()>, but with reversed result.
       
   280 
       
   281 =back
       
   282 
       
   283 =head1 EXAMPLES
       
   284 
       
   285 =head2 Greylisting
       
   286 
       
   287 First you have to include B<Exim::Grey> into your Exim. If Exim is built with Perl
       
   288 support, the configuration syntax allows for C<perl_startup>:
       
   289 
       
   290     perl_startup = use Exim::Grey qw(unseen);
       
   291 
       
   292 In the ACL section of the configuration can check if a given key (sender, or combination
       
   293 of sender and recipient, or whatever) is new (unseen):
       
   294 
       
   295     defer condition = ${perl{unseen}{<$sender_address>:$<local_part@$domain>}}
       
   296 
       
   297 If the same condition is checked more then I<delay> later, the C<unseen> function returns
       
   298 false.
       
   299 
       
   300 =head2 Greylisting + automatic whitelisting
       
   301 
       
   302 Greylisting gets annoying if you do it for senders that are already known to retry. Thus it might be
       
   303 good to maintain a whitelist. You may use a suffix to your key, separated by "/". Once the greylist
       
   304 filter is passed, the used suffixes are registered with the whitelist.
       
   305 
       
   306   t
       
   307   |
       
   308   0  a->b/x  # a->b never seen, suffix never seen: greylist
       
   309   1  a->b/y  # a->b again: accept AND put x and y to the whitelist,
       
   310   |          # as they are known to retry
       
   311   2  c->b/x  # c->b unknown, but x is already whitelisted: accept
       
   312   3  d->b/y  # d->b unknown, but y is already whitelisted: accept
       
   313   |
       
   314   v
       
   315 
       
   316 This can be implemented in your ACL as:
       
   317 
       
   318     defer condition = ${perl{unseen}{<$sender_address>:$<local_part@$domain>/$sender_host_address}}
       
   319 
       
   320 But, if I<a> and I<b> are the sender and the recipient address, and the
       
   321 subkey is the sender host address, a spammer might send a forged message
       
   322 after t0, to get whitelisted.
       
   323 
       
   324 =head1 INTERNALS
       
   325 
       
   326 =head2 Format of the database
       
   327 
       
   328 The record structure is
       
   329 
       
   330  key:   item\0
       
   331  value: timestamp(creation) timestamp(usage) counter[ flags]\0
       
   332 
       
   333 This way we are compatible with ${lookup{...}dbm{...}}
       
   334 
       
   335 =head1 FILES
       
   336 
       
   337 The database files are placed in C<$spool_directory/grey/>.
       
   338 
       
   339 =head1 SEE ALSO
       
   340 
       
   341 The companion tool L<exigrey> should be used for inspection and manipulation
       
   342 of the database.
       
   343 
       
   344 =cut
       
   345 
       
   346 # vim:aw et sw=4 ts=4: