#!/usr/bin/perl
# spam_gatekeeper.pl - Limit the number of Spam messages accepted.
#    Steve Pellegrin (spellegrin at convoglio dot com)
#
# History:
#    1.0   2005-January-4    Original code
#    1.1   2005-January-14   Added syslog and verbose mode
#    2.0   2005-January-14   Redesigned to handle multiple messages
#                            per session (per Wietse's sample code)
#    3.0   2005-January-20   Added skip count
#    4.0   2005-January-30   Added rate limiter
#
# Usage:
#    This script functions as a Postfix policy service
#    (see the Postfix documentation for details).
#    It will accept the first n Spams received each x days
#    this allow you to slowly add fresh Spam to a corpus.

use strict;
use Fcntl qw(:DEFAULT :flock);
use Sys::Syslog qw(:DEFAULT setlogsock);


###########################################################
# Configuration parameters
#    Accept the first Spam received each day
#       my($spamsPerInterval) = 1;
#       my($daysPerInterval) = 1;
#       my($skipsPerInterval) = 0;
#
#    Accept the first three Spams received each week
#    after skipping the first ten received that week
#       my($spamsPerInterval) = 3;
#       my($daysPerInterval) = 7;
#       my($skipsPerInterval) = 10;
#
#    Amount of time (seconds) that must pass between Spams
#       my($spamRateInterval) = 60;
#
#    Maintain separate status for each email address
#       my ($perUserStatus) = 1
#
#    Combine all email addresses into one status
#       my ($perUserStatus) = 0;
#
#    Verbose mode - send additional information to syslog
#       my($verbose) = 1;
#
###########################

my ($daysPerInterval)  = 1;
my ($skipsPerInterval) = 20;
my ($spamsPerInterval) = 50;
my ($spamRateInterval) = 60;

my ($perUserStatus)    = 0;
my ($verbose)          = 1;

my ($statusDir)        = "/var/mta/spam_gatekeeper/";
my ($statusSuffix)     = ".status";

# Syslogging options are for verbose mode and errors.
# NOTE: Comment out the $syslog_socktype line if syslogging
#       does not work on your system.
my ($syslogSocktype)   = 'unix'; # inet, unix, stream, console
my ($syslogFacility)   = "mail";
my ($syslogOptions)    = "pid";
my ($syslogPriority)   = "info";
my ($syslogIdent)      = "postfix/spam_gatekeeper";


###########################################################
#
# Mainline
#

# Prepare for syslog.
setlogsock ($syslogSocktype);
openlog ($syslogIdent, $syslogOptions, $syslogFacility);

# Unbuffer standard output.
select((select(STDOUT), $| = 1)[0]);

# Used if $verbose is true
my ($msgCount) = 1;

# For each set of attributes on STDIN...
my ($currentTime) = time;
while () {
    # Fetch the next batch of attributes.
    # We are done when the batch comes back empty.
    my (%attributes) = fetchAttributes();
    unless (scalar(%attributes)) { last; }

    if ($verbose) {
        syslog(info=> "Processing message #$msgCount");
        syslog(info=> "Sender=%s, Recipient=%s", $attributes{sender}, $attributes{recipient});
        $msgCount += 1;
    }

    # Open and lock the status file.
    openStatusFile($attributes{recipient});

    # Decide how to handle this message.
    if (shouldAcceptSpam($currentTime)) {
        replyAccept();
    } else {
        replyReject();
    }
    updateStatusFile($currentTime);

    # Close the file (also unlocks it).
    close STATUS;
}
exit 0;
# Done!
###########################################################


###########################################################
#
# Fetch incoming attributes
#
sub fetchAttributes {
    my (%attr);
    my ($line);
    while (defined($line = <STDIN>)) {
        # An empty line marks the end of the attributes.
        if ($line eq "\n") { last; }

        # Store attribute or report garbage.
        if ($line =~ /([^=]+)=(.*)\n/) {
            $attr{substr($1, 0, 255)} = substr($2, 0, 255);
        } else {
           chomp($line);
            syslog(warning=>sprintf("Ignoring garbage: %.100s", $line));
        }
    }

    # Check request type if we got any attributes
    if (scalar(%attr)) {
        fatalError("Unrecognized request type: '$attr{request}'")
            unless $attr{request} eq "smtpd_access_policy";
    }

    return %attr;
}
###########################################################


###########################################################
#
# Open the file for r/w access and lock it.
# (We lock the file because of threading issues.)
#
sub openStatusFile {
    my ($recipient) = @_;
    my ($fileName) = getFilename($recipient);

    # sysopen cannot create directories.
    mkdir $statusDir;
    fatalError("Cannot open status file:  $fileName")
        unless sysopen(STATUS, $fileName, O_RDWR | O_CREAT);

    flock(STATUS, LOCK_EX);
}
###########################################################


###########################################################
#
# Decide whether this Spam should be accepted or not.
#
sub shouldAcceptSpam {
    my ($currentTime) = @_;
    my ($currentDate) = makeDate($currentTime);

    # Fetch the current stats.
    my ($spamDate, $spamCount, $timeStamp) = getStatus();
    syslog(info=> "Current date: %s, Spam date: %s, Spam count: %s", $currentDate, $spamDate, $spamCount)
        if $verbose;

    # Decide whether we are allowed to take this one.
    return ((($timeStamp + $spamRateInterval) < $currentTime) &&
            ($currentDate >= $spamDate) &&
            ($spamCount <= $spamsPerInterval) &&
            ($spamCount > 0));
}
###########################################################


###########################################################
#
# Update the status file.
#
sub updateStatusFile {
    my ($currentTime) = @_;
    my ($currentDate) = makeDate($currentTime);

    # Fetch the current stats and increment the counter.
    my ($spamDate, $spamCount, $timeStamp) = getStatus();
    
    # If the Spam date is in the future, leave things as they are.
    unless ($spamDate > $currentDate) {
        # Should we ignore this Spam because it arrived too soon?
        if (($timeStamp + $spamRateInterval) < $currentTime) {
            if ($spamCount > 0) {
                # If we are skipping or accepting Spam, decrement the spam count.
                $spamCount -= 1;
            } else {
                # Advance to the next date.
                my ($secondsPerDay) = (24 * 60 * 60);
                $spamDate = makeDate(time + ($daysPerInterval * $secondsPerDay));
                $spamCount = $spamsPerInterval + $skipsPerInterval;
            }
        
            syslog(info=> "New Spam date: %s, New Spam count: %s", $spamDate, $spamCount)
                if $verbose;

            # Update the values.
            putStatus($spamDate, $spamCount, $currentTime);
        }
        else {
            syslog(info=> "Spam ignored due to rate limit.")
                if $verbose;
        }
    }
}
###########################################################


###########################################################
#
# Tell our caller to accept this Spam.
#
sub replyAccept {
    print STDOUT "action=OK\n\n";
    syslog(info=> "Accepted Spam")
        if $verbose;
}
###########################################################


###########################################################
#
# Tell our caller to reject this Spam.
#
sub replyReject {
    print STDOUT "action=REJECT\n\n";
    syslog(info=> "Rejected Spam")
        if $verbose;
}
###########################################################


###########################################################
#
# Decide what file name we should use.
#
sub getFilename {
    my ($recipient) = @_;

    my ($filename);
    if ($perUserStatus) {
        if (length($recipient) == 0) {
            $filename = "unknown";
        } else {
            $filename = $recipient;
        }
    } else {
        $filename = "combined";
    }

    return "$statusDir$filename$statusSuffix";
}
###########################################################


###########################################################
#
# Read status from the file.
#
sub getStatus {
    # Seek to the start of the file and read our data.
    # If the file is empty, provide default values.
    seek(STATUS, 0, 0);
    
    my ($date, $count, $timeStamp);
    chomp($date = <STATUS>);
    chomp($count = <STATUS>);
    chomp($timeStamp = <STATUS>);
    if (!defined($date)) {
        $date = makeDate(0);
        $count = $spamsPerInterval + $skipsPerInterval;
        $timeStamp = 0;
        syslog(info=> "Using fake status info")
            if $verbose;
    }

    return ($date, $count, $timeStamp);
}
###########################################################


###########################################################
#
# Write status to the file.
#
sub putStatus {
    my ($date, $count, $timeStamp) = @_;

    # Seek to the start of the file and write our data.
    seek(STATUS, 0, 0);
    truncate(STATUS, 0);

    print STATUS "$date\n";
    print STATUS "$count\n";
    print STATUS "$timeStamp\n";
}
###########################################################


###########################################################
#
# Make a date string given a time.
#
sub makeDate {
    my ($theTime) = @_;
    my ($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst) = localtime $theTime;
    $year += 1900;
    if ($yday < 10) {
        $yday = "00$yday";
    } elsif ($yday < 100) {
        $yday = "0$yday";
    }
    return "$year$yday";
}
###########################################################


###########################################################
#
# Report an error at all syslog levels and die.
#
sub fatalError {
    my ($msg) = @_;

    syslog(err=> $msg);
    syslog(warning=> $msg);
    syslog(info=> $msg);

    exit 1;
}
###########################################################

