#!/usr/bin/perl -w
# Very loosely based on http://svn.apache.org/repos/asf/spamassassin/branches/3.2/tools/check_whitelist 
# rewrirren by DGermansa@Real-World-Systems.com
#
my $u1 = " sa-heatu Spam Assassin - Heuristic Email Address Tracker Utility  3.02 101002  \n".
          "                    DGermansa\@Real-world-Systems.com (c)2010 Dennis G German  ";

my $u2 = " usage: sa-heatu --quiet --showUpdates --verbose  \n".
         "                 --firstTimes --DONTupdateTimestamps  --noTimestamps   \n".
         "                 --expireOlderThan days \n".
         "                 --remove nnnnnn\@dddddd.xxx                          dbfile timestamps \n";

#        "                 --onlySpammers  \n".      ( dont save entries with -xx scores, even friends dont spam!
# 3.01 don't show day o week or seconds since it it sa-heatu runtime, reduce size of timestamp by keeping it in minutes 
# simple: copy autowhite-list entries to output, 
#         while updating a timestamp file by taking count from auto-whitelist and incuding a timestamp to new entries 
#         meanwhile skipping old entries (effectively deleting them),
#         removing a specific entry and
#         displaying the total score, average, count, email, IP, firstSeen
# all the other code is deciding what to display        
# autowhite-list is opened RDWR to minimize attempts by spamd to update while we are running.( not the best )
#after  sa-heatu completes:
#   mv auto-whitelist  auto-whitelist-1
#   mv auto-whitelisto auto-whitelist
#   mv timestamps  timestamps-1
#   mv timestampso timestamps
#
# showUpdates show itimestamps expired, added
sub usage { print $u1,$u2; exit;}
use strict;
use Fcntl;
use Getopt::Long;      #http://perldoc.perl.org/Getopt/Long.html
use English;          # get descriptive names for built-in variables
# 3.02 correct  -noTimestamps spelling was noTimesamps  second t missing AND disptime ="" if noTimestamps
# 3.01 in -> out to clean up deletes. prune makes no sense, removed. Display timestamps
# 3.00 add timestamps file

$OUTPUT_AUTOFLUSH = 1;
$OUTPUT_RECORD_SEPARATOR ="\n";  #    print (not printf !)
use vars 
    qw( $opt_min $opt_help $opt_showUpdates $opt_verbose $opt_remove $opt_quiet $opt_prune
        $opt_NoTimes $opt_NoUTimes  $opt_XOT $opt_firstTimes); 

my $false=0; my $true=1; 
my ($db, $ts, $dbo, $tso, %h,  %t, %ho,  %to, @k, $k, @tk, $tk, $tkey, $tcount, $twas);
my ($key,  $email,  $ip,  $totscore,  $count);
my ($width,  $fmt, $disptime,  $reason,  $prt,  $prtu, $skipf,  $tstamp, $lastUpdate, $oldest);
my $numc=0; my $remc=0;  my $tnewc=0; my $updatedc=0; my $expiredc=0; my $wouldbe=""; my $malformedc=0;
my $entrieso=0;
GetOptions(
  'help'            => \$opt_help,
  'showUpdates'     => \$opt_showUpdates,
  'verbose'         => \$opt_verbose,
  'noTimestamps'     => \$opt_NoTimes,
  'DONTupdatetimestamps' => \$opt_NoUTimes,
  'expireOlderThan=i' => \$opt_XOT,    # can't --remove and --expire 
  'firstTimes'      => \$opt_firstTimes,
  'remove=s'        => \$opt_remove,   # can't --remove and --expire 
  'prune=i'         => \$opt_prune,  # deprecated, just ignore
  'quiet'           => \$opt_quiet
         ) or usage();
$opt_help and usage();
$opt_verbose ||= 0;
$opt_NoTimes||= 0;
$opt_NoUTimes||= 0;
$opt_XOT||= 183;
$opt_firstTimes||= 0;
$opt_remove  ||= "";
$opt_showUpdates ||= 0;

BEGIN { @AnyDBM_File::ISA = qw(DB_File GDBM_File NDBM_File SDBM_File); }
use AnyDBM_File ;
if(!$opt_quiet) { print $u1; 
                  if($opt_firstTimes){print " First timeStamp run, i.e. no input from timestamps  ";}
                  if($opt_NoUTimes)  {print " No updating of times will be done  ";}
                  if($opt_NoTimes)   {print " No timestamp processing will be performed  ";}
               }
if (defined $ENV{COLUMNS}) {$width= ($ENV{COLUMNS}-66).""; } else { $width=20;}
if ($#ARGV == -1) { $db = $ENV{HOME}."/.spamassassin/auto-whitelist"; } else {    $db = $ARGV[0]; } $dbo = $db."o"; 
if ($#ARGV !=  1) { $ts = $ENV{HOME}."/.spamassassin/timestamps";     } else {    $ts = $ARGV[1]; } $tso = $ts."o";


tie %h, "AnyDBM_File",$db,  O_RDWR,  0600 or die "Cannot open   \"$db\" $!\n";   ## use O_RDWR to keep spamd away
if(!$opt_quiet){ print " Reading $db ";} 

tie %ho,"AnyDBM_File",$dbo, O_CREAT, 0600 or die "Cannot create \"$dbo\" $!\n";    ## create will use old if exists!
if(!$opt_quiet){ print " Writing $dbo  ";} 

$opt_remove = lc($opt_remove);    # If he specified mixed case change uppercase -> lower 
if($opt_remove ne "" && !$opt_quiet) { print "Attemptimng removal of:\"$opt_remove\" ";} 

if ( !$opt_NoTimes ){
if( !$opt_firstTimes){ 
 tie %t,  "AnyDBM_File",$ts,  O_RDONLY,  0600 or die "Cannot open:\"$ts\"    $!    \n"; 
 if(!$opt_quiet){ print " Reading $ts ";} 
}
tie  %to, "AnyDBM_File",$tso, O_CREAT,   0600 or die "Cannot create:\"$tso\" $!    \n"; 
if(!$opt_quiet){ print " Writing $tso  ";} 


if(!$opt_firstTimes) {@tk = grep(!/00-lastupdate$/,keys(%t));
                      $lastUpdate = $t{"00-lastupdate"};
         if ( defined $lastUpdate ){ if(!$opt_quiet){print " Timestamps last updated:". localtime($lastUpdate)}; }
           else {die " !! lastupdate key \"00-lastupdate\" is missing!! Did you mean to use --firstTimes ? \n\n";}

    $opt_XOT = time - $opt_XOT*24*60*60;  # days to minutes, to seconds  prior to now
    if ( $opt_XOT  > 0){ $fmt=localtime($opt_XOT); print " Expiring entries not seen since $fmt";}
                       } # not first time
                     }  # !NoTimes    i.e.  timestamps active

@k = grep(!/totscore$/,keys(%h));   # make an array of keys excluding the totscore keys
# generate the formatting specs, vary the width of the email shown based on how wide the display is
$fmt=    "\n %7.1f %9.1f %3d   %-" . $width . "s %s; %s";  

if (!$opt_quiet && $opt_verbose ){ print " average   total   count   ";}
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
for $key (@k) {
     $numc ++;
     $count    = $h{$key};
     $totscore = $h{"$key|totscore"}; if( !defined $totscore){ $malformedc++; next;}
   
     ($email = $key ) =~ s/\|.*//;      # clear |ip=nnn.nnn from key
     ($ip    = $key ) =~ s/.*\|ip=//;   # clear email@dom.tld|ip=  from key eaving  nnn.nnn
     $skipf=$false; 
     $prtu=$false; # candidate for print updates?
     $reason="";

     if ($email eq $opt_remove ) {      $remc++;                       $reason="remvd,"; $prtu=$true; $skipf=$true; }

     if(!$opt_NoTimes ){                                            # example 8/14/2010 14:21 IS 1281810061
       $tcount=$count;    # use count from awl for new or unchanged entries
       if($opt_firstTimes){             $tnewc++; $twas=0; $tstamp=time; $reason="";                }
        else{
         if(!defined $t{$key} )       { $tnewc++; $twas=0; $tstamp=time; $reason="new,  "; $prtu=$true;}
# existing:
          else{ ($twas   = $t{$key} ) =~   s/,.*//;  #  clear ,and count leaving time
                ($tcount = $t{$key} ) =~ s/.*,//;    #  clear beginning of time entry leaving tcount
                 if( $tcount < $count){ $updatedc++;        $tstamp=time; $reason="updtd,"; $prtu=$true; $tcount=$count;} 
                 else{                                      $tstamp=$twas;$reason="kept, ";} 

              if ($email eq $opt_remove ) {                               $reason="remvd,"; } #  this looks lame but...

              if($twas lt $opt_XOT )       { $expiredc++;                 $reason="exprd,"; $prtu=$true; $skipf=$true;} 
                     # lt string compare  (in case twas is text??)
              } # new | update | kept timestamp
            }  # ! first times
                    }  # ! No Times 
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# This looks lame but is easy to understand ( and get right!!) --quiet only affects summary since,
      $prt=$false;                                #                 default shows summary, not details
      if($opt_showUpdates && $prtu){$prt=$true;}  # showUpdates
      if($opt_verbose)             {$prt=$true;}  # verbose (overrides showUpdates)
      if($prt){
            printf $fmt, $totscore/$count, $totscore,$count, $email, $ip, $reason;
            if($opt_NoTimes ){ $disptime="";} 
             else{ $disptime=localtime $twas; # don't include d-o-w, and drop seconds as that implies precision
                   $disptime =~ s/... //;   #day o week out
                   $disptime =~ s/:.. / /;   #seconds out
                   printf "%s", $disptime  ;
                    }
              } # if prt

      if(!$skipf){

        $entrieso++;
        $ho{$key}            = $h{$key} ;
        $ho{"$key|totscore"} = $h{"$key|totscore"} ;  # how ugly is this key!
        if(!$opt_NoTimes){
          if ($opt_NoUTimes) {$to{$key}         = $t{key};}   #keep old values
           else              {$to{$key}         = $tstamp.",".$tcount;} 
                        } # NoTimes
                  } # !skip

               }  # end for key  next 


$to{"00-lastupdate"}=time;
untie %h; untie %ho;
untie %t; untie %to; 

if ($opt_NoUTimes){$wouldbe=" would be";}

if( !$opt_quiet ){ 
if($width < 30){print " \n       export COLUMNS=nnn and you will get a nicer display!";} 
                print  "\n average    total count     \n";  #  footing since heading was so long ago

                printf "%8d entries removed.                    \n", $remc;
                printf "%8d entries". $wouldbe." expired.       \n", $expiredc; 
                printf "%8d timestamps". $wouldbe." added.      \n", $tnewc; 
                printf "%8d timestamps". $wouldbe." updated.    \n", $updatedc; 
if($malformedc){printf "%8d missing \"totscore\"                \n", $malformedc; }
                print  "                          ";
                printf "%8d entries input.                      \n", $numc; 
                printf "%8d entries output = input - expired - removed.   \n\n", $entrieso; }
 exit 0 ;

# I would appreciate notification if you distribute or modify this program 
# of if you have ideas on how to improve it.
#    DGermansa@Real-World-Systems.com


#   License:
#       This program is free software; you can redistribute it and/or modify
#       it under the terms of the GNU General Public License as published by
#       the Free Software Foundation; either version 2 of the License, or
#       (at your option) any later version.
#
#       This program is distributed in the hope that it will be useful,
#       but WITHOUT ANY WARRANTY; without even the implied warranty of
#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
#       See the GNU General Public License for more details.
#
#       You should have received a copy of the GNU General Public License
#       along with this program. If not, see <http://www.gnu.org/licenses/>.  
#  Copyright (C) 2010 Dennis G German <DGermansa@Real-World-Systems.com>
# tar -vcf sa-heatu.3.02.tar sa-heatu sa-heatu.readme sa-heatu.html 64c.hexdump style.css COPYING
