view maltfilter @ 79:9095db0fad8f

v0.18.0: Bunch of bugfixes; logfile trailing/scanning speed improved; memory usage improvements.
author Matti Hamalainen <ccr@tnsp.org>
date Sat, 29 Aug 2009 05:24:31 +0300
parents 4769aad8bd14
children 4e3f87470426
line wrap: on
line source

#!/usr/bin/perl -w
#############################################################################
#
# Malicious Attack Livid Termination Filter daemon (maltfilter)
# Programmed by Matti 'ccr' Hämäläinen <ccr@tnsp.org>
# (C) Copyright 2009 Tecnic Software productions (TNSP)
#
#############################################################################
use strict;
use Date::Parse;
use Net::IP;
use Net::DNS;
use LWP::UserAgent;
use IO::Seekable;

my $progversion = "0.18.0";
my $progbanner =
"Malicious Attack Livid Termination Filter daemon (maltfilter) v$progversion\n".
"Programmed by Matti 'ccr' Hamalainen <ccr\@tnsp.org>\n".
"(C) Copyright 2009 Tecnic Software productions (TNSP)\n";


#############################################################################
### Default settings and configuration
#############################################################################
my %settings = (
  "VERBOSITY"           => 3,
  "DRY_RUN"             => 1,
  "LOGFILE"             => "",
  "STATS_MAX_AGE"       => 336,  # in hours

  "PASSWD"              => "/etc/passwd",
  "SYSACCT_MIN_UID"     => 1,
  "SYSACCT_MAX_UID"     => 999,

  "FILTER"              => 0,
  "FILTER_THRESHOLD"    => 3,
  "FILTER_MAX_AGE"      => 168,  # in hours
  "FILTER_TARGET"       => "DROP",
  "IPTABLES"            => "/sbin/iptables",

  "FULL_TIME"           => 1,
  "STATUS_FILE_PLAIN"   => "",
  "STATUS_FILE_HTML"    => "",
  "STATUS_FILE_CSS"     => "",
  "WHOIS_URL"           => "http://whois.domaintools.com/",

  "CHK_SSHD"            => 1,
  "CHK_KNOWN_CGI"       => 1,
  "CHK_PHP_XSS"         => 1,
  "CHK_PROXY_SCAN"      => 1,
  "CHK_ROOT_SSH_PWD"    => 0,
  "CHK_SYSACCT_SSH_PWD" => 0,
  "CHK_GOOD_HOSTS"      => "",

  "EVIDENCE"            => 0,
  "EVIDENCE_DIR"        => "",

  "DRONEBL"             => 0,
  "DRONEBL_THRESHOLD"   => 5,
  "DRONEBL_MAX_AGE"     => 30, # in minutes
  "DRONEBL_RPC_URI"     => "http://dronebl.org/RPC2",
  "DRONEBL_RPC_KEY"     => "",
);

# List loopback and private netblocks by default here
my @noaction_ips_def = (
  "127.0.0.0/8",
  "10.0.0.0/8",
  "172.16.0.0/12",
  "192.168.0.0/16"
);

my %systemacct = ();
sub check_add_hit($$$$$$);


#############################################################################
### Check given logfile line for matches
#############################################################################
sub check_log_line($)
{
  # (1) SSHD scans
  if (/^(\S+\s+\d+\s+\d\d:\d\d:\d\d)\s+\S+\s+sshd\S*?: (.*)/) {
    my $mdate = $1;
    my $merr = $2;
    
    # (1.1) Generic login scan attempts
    if ($merr =~ /^Failed password for invalid user (\S+) from (\d+\.\d+\.\d+\.\d+)/) {
      check_add_hit($2, $mdate, "SSH login scan", "", 13, $settings{"CHK_SSHD"});
    }
    # (1.2) Root account SSH login password bruteforcing attempts.
    elsif ($merr =~ /^Failed password for root from (\d+\.\d+\.\d+\.\d+)/) {
      check_add_hit($1, $mdate, "Root SSH password bruteforce", "", 13, $settings{"CHK_ROOT_SSH_PWD"});
    }
    # (1.3) System account SSH login password bruteforcing attempts.
    elsif ($merr =~ /^Failed password for (\S+) from (\d+\.\d+\.\d+\.\d+)/) {
      my $mip = $2; my $macct = $1;
      if (defined($systemacct{$macct})) {
        check_add_hit($mip, $mdate, "SSH system account bruteforce", $macct, 13, $settings{"CHK_SYSACCT_SSH_PWD"});
      }
    }
  }

  # (2) Common/known vulnerable CGI/PHP software scans (like phpMyAdmin)
  elsif (/^\[(.+?)\]\s+\[error\]\s+\[client\s+(\d+\.\d+\.\d+\.\d+)\]\s+(.+)$/) {
    my $mdate = $1;
    my $mip = $2;
    my $merr = $3;
    if ($merr =~ /^File does not exist: (.+)$/) {
      my $tmp = $1;
      if ($tmp =~ /\/mss2|\/pma|admin|sql|\/roundcube|\/webmail|\/bin|\/mail|xampp|zen|mailto:|appserv|cube|round|_vti_bin|wiki/i) {
        check_add_hit($mip, $mdate, "CGI vuln scan", $tmp, 2, $settings{"CHK_KNOWN_CGI"});
      }
    }
  }

  # (3) Apache common logging format checks
  elsif (/(\d+\.\d+\.\d+\.\d+)\s+-\s+-\s+\[(.+?)\]\s+\"GET (\S*?) HTTP\//) {
    my $mdate = $2;
    my $mip = $1;
    my $merr = $3;
    
    # (3.1) Simple match for generic PHP XSS vulnerability scans
    if ($merr =~ /\.php\?\S*?=http:\/\/([^\/]+)/) {
      if (!check_hosts($settings{"CHK_GOOD_HOSTS"}, $1)) {
        if ($merr =~ /\.php\?\S*?=(http:\/\/[^\&\?]+\??)/) {
          evidence_queue($mip, $1, $merr);
        }
        check_add_hit($mip, $mdate, "PHP XSS", $merr, 2, $settings{"CHK_PHP_XSS"});
      }
    }
    # (3.2) Try to match proxy scanning attempts
    elsif ($merr =~ /^http:\/\/([^\/]+)/) {
      if (!check_hosts($settings{"CHK_GOOD_HOSTS"}, $1)) {
        check_add_hit($mip, $mdate, "Proxy scan", $merr, 2, $settings{"CHK_PROXY_SCAN"});
      }
    }
  }
}


#############################################################################
### Global variables
#############################################################################
my $reportmode = 0;      # Full report mode
my @scanfiles = ();      # Files to scan
my @scanfiles_once = (); # Files to scan only once during startup or HUP (e.g. not continuously followed)
my @noaction_ips = ();   # IPs not to filter
my %filehandles = ();    # Global hash holding opened scanned log filehandles
my $pid_file = "";       # Name of Maltfilter daemon pid file
my @configfiles = ();    # Array of configuration file names
my $LOGFILE;             # Maltfilter logfile handle
my %dronebl = ();

# IPs currently blocked in Netfilter $filterlist{$ip} = date
my %filterlist = ();      

# Gathered information about hosts
# $statlist{$ip}->
# "date1"    = timestamp of first hit
# "date2"    = timestamp of latest hit
# "hits"     = number of hits to this IP
# "dronebl"  = 0 == n/a, 1 == queued for submission, 2 == submitted
# $statlist{$ip}{"reason"}{$class}->
#   "msg"    = reason message (array if $reportmode)
#   "hits"   = hits to this class
#   "date1"  = timestamp of first hit
#   "date2"  = timestamp of latest hit
my %statlist = ();

# Gathered information about ignored hits (e.g. hits for tests that are not enabled)
# Same fields as in %statlist
my %ignorelist = ();


#############################################################################
### Status output functionality
#############################################################################
sub urlencode($)
{
  my $value = $_[0];
  $value =~ s/([^a-zA-Z_0-9 ])/"%" . uc(sprintf "%lx" , unpack("C", $1))/eg;
  $value =~ tr/ /+/;
  return $value;
}

my %entities = (
  "<" => "lt",
  ">" => "gt",
  "&" => "amp",
);

sub htmlentities($)
{
  my $value = $_[0];
#  $value =~ s/([keys %entities])/"&".$entities{$1}.";"/eg;
  foreach my $val (keys %entities) {
    $value =~ s/$val/\&$entities{$val}\;/g;
  }
  return $value;
}

sub get_time_str($)
{
  if ($_[0] >= 0) {
    return scalar localtime($_[0]);
  } else {
    return "?";
  }
}

my @paskat = (30*24*60*60, 7*24*60*60, 24*60*60, 60*60, 60);
my @opaskat = ("months", "weeks", "days", "hours", "minutes");
my @upaskat = ("month", "week", "day", "hour", "minute");

sub get_ago_str($)
{
  return get_time_str($_[0]) if ($settings{"FULL_TIME"});
  if ($_[0] >= 0) {
    my $str = "";
    my $cur = time() - $_[0];
    my ($r, $k, $p, $n);
    $n = 0;
    foreach my $div (@paskat) {
      $r = int($cur / $div);
      $k = ($cur % $div);
      if ($r > 0) {
        $p = ($r > 1) ? $opaskat[$n] : $upaskat[$n];
        $str .= ", " if ($str ne "");
        $str .= sprintf("%d %s", $r, $p);
      }
      $cur = $k;
      $n++;
    }
    return $str." ago";
  } else {
    return "?";
  }
}

sub printH($$$$)
{
  my $fh = $_[1];
  if ($_[0]) {
    print $fh "<h".$_[2].">".$_[3]."</h".$_[2].">\n";
  } else {
    my $c = ($_[2] <= 1) ? "=" : "-";
    print $fh $_[3]."\n". $c x length($_[3]) ."\n";
  }
}

sub printTD
{
  my $fh = $_[1];
  if ($_[0]) {
    my $s = defined($_[3]) ? " ".$_[3]." " : "";
    print $fh "<td".$s.">".$_[2]."</td>";
  } else {
    print $fh $_[2];
  }
}

sub printP($$$)
{
  my $fh = $_[1];
  if ($_[0]) {
    print $fh "<p>\n".$_[2]."</p>\n";
  } else {
    print $fh $_[2]."\n";
  }
}

sub printElem
{
  my $fh = $_[1];
  if ($_[0]) {
    print $fh $_[2];
  } elsif (defined($_[3])) {
    print $fh $_[3];
  }
}

sub bb($)
{
  return $_[0] ? "<b>" : "";
}

sub eb($)
{
  return $_[0] ? "</b>" : "";
}

sub pe($$)
{
  return $_[0] ? "<$_[1]>" : "";
}

sub get_link($$)
{
  if ($settings{"WHOIS_URL"} ne "") {
    return $_[0] ? "<a href=\"".$settings{"WHOIS_URL"}.$_[1].
      "\">".htmlentities($_[1])."</a>" : $_[1];
  } else {
    return $_[0];
  }
}

sub print_table1($$$$$$)
{
  my ($m, $f, $table, $keys, $func, $class) = @_;
  my $ntotal = 0;

  printElem($m, $f,
  "<table class=\"".$class."\">\n".
  "<tr><th>Hits</th><th>IP-address</th><th>First hit</th><th>Latest hit</th><th>Reason(s)</th></tr>\n",

  "Hits       | IP-address      | First hit                | Latest hit               | Reason(s)\n"
  );
  
  foreach my $mip (sort { $func->($table, $a, $b) } keys %{$keys}) {
    my $blocked = defined($filterlist{$mip}) ? "filtered" : "unfiltered";
    printElem($m, $f, " <tr class=\"$blocked\">");
    printTD($m, $f, sprintf(bb($m)."%-10d".eb($m), $table->{$mip}{"hits"}));
    printElem(!$m, $f, " | ");
    printTD($m, $f, sprintf("%-15s", get_link($m, $mip)));
    printElem(!$m, $f, " | ");
    printTD($m, $f, get_ago_str($table->{$mip}{"date1"}));
    printElem(!$m, $f, " | ");
    printTD($m, $f, get_ago_str($table->{$mip}{"date2"}));
    printElem(!$m, $f, " | ");
    my @reasons = ();
    foreach my $class (sort keys %{$table->{$mip}{"reason"}}) {
      my $msgs;
      if ($class ne "IPTABLES") {
      if ($reportmode) {
        my @tmp = reverse(@{$table->{$mip}{"reason"}{$class}{"msg"}});
        if ($#tmp > 5) { $#tmp = 5; }
        foreach (@tmp) { $_ = htmlentities($_); }
        $msgs = join(" ".bb($m)."|".eb($m)." ", @tmp);
      } else {
        $msgs = $table->{$mip}{"reason"}{$class}{"msg"};
      }
      push(@reasons, bb($m).$class.eb($m)." #".$table->{$mip}{"reason"}{$class}{"hits"}.
        " ( ".$msgs." )");
      }
    }
    printTD($m, $f, join(", ", @reasons));
    printElem($m, $f, "</tr>\n", "\n");
    $ntotal++;
  }
  printElem($m, $f, "</table>\n");
  printP($m, $f, bb($m).$ntotal.eb($m)." entries total.\n");
}

sub cmp_ips($$$)
{
  my @ipa = split(/\./, $_[1]);
  my @ipb = split(/\./, $_[2]);
  for (my $i = 0; $i < 4; $i++) {
    return -1 if ($ipa[$i] > $ipb[$i]);
    return  1 if ($ipa[$i] < $ipb[$i]);
  }
  return 0;
}

sub test_ips($$)
{
  my @ipa = split(/\./, $_[0]);
  my @ipb = split(/\./, $_[1]);
  for (my $i = 0; $i < 3; $i++) {
    return $i if ($ipa[$i] != $ipb[$i]);
  }
  return 4;
}

my @ipcolors = (
  "#666",
  "#777",
);

sub print_table2($$$$$$)
{
  my ($m, $f, $table, $keys, $func, $class) = @_;
  my $nhits = 0;
  my $str = "<th>IP-address</th><th>Hits</th><th>DroneBL?</th><th>First hit</th><th>Latest hit</th><th>Class</th>";
  my $str2 = "IP-address      | Hits      | DroneBL | First hit                | Latest hit               | Class                         ";

  printElem($m, $f,
  "<table class=\"".$class."\">\n<tr>". $str."<th> </th>".$str ."</tr>\n",
  $str2." || ".$str2."\n");

  my @previp = ("0.0.0.0", "0.0.0.0");
  my @ncolor = (0, 0);
  
  my $printEntry = sub {
    my $blocked = "class=\"".(defined($filterlist{$_[0]}) ? "filtered" : "unfiltered")."\"";
    if (test_ips($previp[$_[1]], $_[0]) < 3) {
      $ncolor[$_[1]]++;
    }
    $previp[$_[1]] = $_[0];
    my $str = "style=\"background: ".$ipcolors[$ncolor[$_[1]] % scalar @ipcolors].";\"";

    printTD($m, $f, sprintf("%-15s", get_link($m, $_[0])), $str);
    printElem(!$m, $f, " | ");
    printTD($m, $f, sprintf("%-8d ", $table->{$_[0]}{"hits"}), $blocked);
    printElem(!$m, $f, " | ");
    printTD($m, $f, sprintf("%-6s ", $table->{$_[0]}{"dronebl"}), $blocked);
    printElem(!$m, $f, " | ");
    printTD($m, $f, get_ago_str($table->{$_[0]}{"date1"}), $blocked);
    printElem(!$m, $f, " | ");
    printTD($m, $f, get_ago_str($table->{$_[0]}{"date2"}), $blocked);
    printElem(!$m, $f, " | ");
    my $tmp = join(", ", sort keys %{$table->{$_[0]}{"reason"}});
    printTD($m, $f, sprintf("%-30s", $tmp), $blocked);
    $nhits += $table->{$_[0]}{"hits"};
  };

  my @mkeys = sort { $func->($table, $a, $b) } keys %{$keys};
  my $nkeys = scalar @mkeys;
  my $kmax = $nkeys / 2;
  
  for (my $i = 0; $i <= $kmax; $i++) {
    printElem($m, $f, " <tr>");
    if ($i < $kmax) {
      $printEntry->($mkeys[$i], 0);
      printElem($m, $f, "<th> </th>", " || ");
    }
    if ($i + $kmax + 1 < $nkeys) { $printEntry->($mkeys[$i + $kmax + 1], 1); }
    printElem($m, $f, "</tr>\n", "\n");
  }

  printElem($m, $f, "</table>\n");
  printP($m, $f, bb($m).$nkeys.eb($m)." entries total, ".bb($m).$nhits.eb($m)." hits total.\n");
}

sub cmp_hits($$$)
{
  my $s1 = $_[0]->{$_[1]};
  my $s2 = $_[0]->{$_[2]};

  return -1 if ($s2->{"date2"} < $s1->{"date2"});
  return  1 if ($s2->{"date2"} > $s1->{"date2"});
  return $s2->{"hits"} <=> $s1->{"hits"};
}

sub get_period($)
{
  my ($str, $r, $k);
  if ($_[0] > 30 * 24) {
    $r = $_[0] / (30 * 24);
    $k = $_[0] % (30 * 24);
    $str = sprintf("%d months", $r);
    $str .= sprintf(", %d days", $k) if ($k > 0);
  } elsif ($_[0] > 24 * 7) {
    $str = sprintf("%1.1f weeks", $_[0] / (24.0 * 7.0));
  } elsif ($_[0] > 24) {
    $r = $_[0] / 24;
    $k = $_[0] % 24;
    $str = sprintf("%d days", $r);
    $str .= sprintf(", %d hours", $k) if ($k > 0);
  } else {
    $str = sprintf("%d hours", $_[0]);
  }
  return $str;
}

sub generate_status($$)
{
  my $filename = shift;
  my $m = shift;
  
  return unless ($filename ne "");

  open(STATUS, ">", $filename) or mdie("Could not open '".$filename."'!\n");
  my $f = \*STATUS;
  
  printElem($m, $f, "
<html>
<head>
 <title>Maltfilter status report</title>
");

  printElem($m, $f, "<link href=\"".$settings{"STATUS_FILE_CSS"}."\" rel=\"stylesheet\" type=\"text/css\" />")
  if ($settings{"STATUS_FILE_CSS"});

  printElem($m, $f, "
</head>
<body>
");

  printH($m, $f, 1, "Maltfilter v$progversion status report");
  my $period = get_period($settings{"STATS_MAX_AGE"});

  printP($m, $f,
  "Generated ".bb($m).get_time_str(time()).eb($m).". Data computed from ".
  ($reportmode ? "complete logfile scan" : "a period of last $period").".\n");

  printP($m, $f, "The hit classes marked as 'IPTABLES' are a pseudo-class meaning an\n".
  "filtered IP that was in Netfilter before Maltfilter was started.\n");

  if ($settings{"FILTER"} > 0) {
    printH($m, $f, 2, "Currently filtered entries");
    $period = get_period($settings{"FILTER_MAX_AGE"});
    printP($m, $f, "List of IPs that are currently filtered (or would be, if this is\n".
    "a report-only mode). Data from period of $period.\n");
    print_table1($m, $f, \%statlist, \%filterlist, \&cmp_hits, "filtered");
  }

  printH($m, $f, 2, "Summary of entries");
  printP($m, $f, "List of 'hits' of suspicious activity noticed by Maltfilter, but not\n".
  "necessarily acted upon. Sorted by descending IP address.\n");
  print_table2($m, $f, \%statlist, \%statlist, \&cmp_ips, "global");

  printH($m, $f, 2, "Ignored entries");
  printP($m, $f, "List of hits that were ignored (not acted upon), because the test was disabled.\n".
  "Notice that the entry may be blocked due to other checks, however.\n");
  print_table1($m, $f, \%ignorelist, \%ignorelist, \&cmp_hits, "ignored");

  printElem($m, $f, "</body>\n</html>\n");
  close(STATUS);
}


#############################################################################
### DroneBL submission support
#############################################################################
sub dronebl_process
{
  return unless ($settings{"DRONEBL"} > 0);

  # Create submission data
  my $xml = "<?xml version=\"1.0\"?>\n<request key=\"".$settings{"DRONEBL_RPC_KEY"}."\">\n";
  my $entries = 0;
  while (my ($ip, $entry) = each(%dronebl)) {
    if ($entry->{"sent"} == 0 && $entry->{"tries"} < 3) {
#      $xml .= "<add ip=\"".$ip."\" type=\"".$entry->{"type"}."\" />\n";
      $xml .= "<add ip=\"".$ip."\" type=\"1\" />\n";
      $entries++;
    }
  }
  $xml .= "</request>\n";

  # Bait out if no entries to submit
  return unless ($entries > 0);
  if ($settings{"DRY_RUN"}) {
    mlog(2, "[DroneBL] Would submit $entries entries.\n");
    return;
  } else {
    mlog(2, "[DroneBL] Trying to submit $entries entries.\n");
  }

  # Submit via HTTP XML-RPC
  my $tmp = LWP::UserAgent->new;
  $tmp->agent("Maltfilter/".$progversion);
  $tmp->timeout(10);
  my $req = HTTP::Request->new(POST => $settings{"DRONEBL_RPC_URI"});
  $req->content_type("text/xml");
  $req->content($xml);
  $req->user_agent("Maltfilter/".$progversion);
  my $res = $tmp->request($req);

  if ($res->is_success) {
    mlog(3, "[DroneBL] HTTP response [".$res->code."] ".$res->message."\n");
    my $str = $res->content;
    my ($type, $msg);
    $str =~ tr/\n/ /;

    if ($str =~ /<response\s*type=.(success|error).>(.*?)<\/response>/gm) {
      $type = $1; $msg = $2;
    } elsif ($str =~ /<response\s*type=.(success|error). *\/>/gm) {
      $type = $1; $msg = "";
    }
    
    if ($type eq "success") {
        mlog(1, "[DroneBL] Succesfully submitted $entries entries.\n$msg\n");
        while (my ($ip, $entry) = each(%dronebl)) {
          $entry->{"sent"} = 1;
          $statlist{$ip}{"dronebl"} = 2 if defined($statlist{$ip});
        }
    } elsif ($type eq "error") {
      # If we don't have a valid key, disable further submissions.
      if ($msg =~ /<code>403<\/code>/) {
        mlog(-1, "Disabling DroneBL submission due to invalid key.\n");
        $settings{"DRONEBL"} = 0;
      }
      # Log error message mangled
      $msg =~ s{\s*</?[^>]+>}{ }g;
      mlog(-1, "[DroneBL] Error in submission: $msg\n");
    } else {
      mlog(-1, "[DroneBL] Unsupported response message ".$str."\n");
    }
  } else {
    mlog(-1, "[DroneBL] HTTP request failed: [".$res->code."] ".$res->message."\n");
  }
  
  # Clean up expired entries, warn/note about unsubmitted ones.
  while (my ($ip, $entry) = each(%dronebl)) {
    if (!check_time3($entry->{"date"})) {
      mlog(1, "[DroneBL] $ip submission expired.\n") unless ($entry->{"sent"} > 0);
      delete($dronebl{$ip});
    }
  }
}

sub dronebl_queue($$$)
{
  my ($mip, $mdate, $mtype) = @_;

  return unless ($settings{"DRONEBL"} > 0);
  return if check_hosts_array(\@noaction_ips, $mip);

  if (!defined($dronebl{$mip})) {
    mlog(3, "[DroneBL] Queueing $mip \@ $mdate ($mtype)\n");
    $dronebl{$mip}{"type"} = $mtype;
    $dronebl{$mip}{"date"} = $mdate;
    $dronebl{$mip}{"sent"} = 0;
    $dronebl{$mip}{"tries"} = 0;
    $statlist{$mip}{"dronebl"} = 1 if defined($statlist{$mip});
  }
}

#############################################################################
### Evidence gathering
#############################################################################
my %evidence = ();

sub evidence_queue($$$)
{
  my ($mip, $mdata, $mfull) = @_;

  return unless ($settings{"EVIDENCE"} > 0);

  my $tmp = $mdata;
  $tmp =~ s/http:\/\///;
  $tmp =~ s/^\.+/_/;
  $tmp =~ s/[^A-Za-z0-9:\.]/_/g;

  $evidence{$mdata}{"coll"} = $tmp;
  $evidence{$mdata}{"hosts"}{$mip} = 1;
  $evidence{$mdata}{"full"}{$mfull} = 1;
}

sub evidence_fetch($$)
{
  my $tmp = LWP::UserAgent->new;
  $tmp->agent("-");
  $tmp->timeout(10);
  $tmp->default_headers->referer($_[1]);
  my $req = HTTP::Request->new(GET => $_[0]);
  return $tmp->request($req);
}

my $evidence_dir = 0;
sub evidence_gather
{
  my $dns = Net::DNS::Resolver->new;
  my $base = $settings{"EVIDENCE_DIR"};

  return unless ($settings{"EVIDENCE"} > 0);

  if (! -e $base) {
    mlog(-1, "Evidence directory '$base' has disappeared.\n") unless ($evidence_dir > 0);
    mdie("Evidence directory '$base' has been absent for $evidence_dir cycles, dying.\n") if ($evidence_dir++ > 10);
    return;
  } else {
    $evidence_dir = 0;
  }
  
  my $fetched = 0;
  foreach my $url (keys %evidence) {
    my $filename = $base."/".$evidence{$url}{"coll"}.".data";
    my $filename2 = $base."/".$evidence{$url}{"coll"}.".hosts";
    my $filename3 = $base."/".$evidence{$url}{"coll"}.".info";

    # Get data contents only once
    if (! -e $filename) {
      $fetched++;
      mlog(1, "Fetching evidence for $url\n");
      my $res = evidence_fetch($url, "");
      open(FILE, ">:raw", $filename) or mdie("Could not open '$filename' for writing.\n");
      binmode(FILE, ":raw");
      if ($res->is_success && $res->code >= 200 && $res->code <= 201) {
        print FILE $res->content;
      }
      close(FILE);

      open(FILE, ">:raw", $filename3) or mdie("Could not open '$filename3' for writing.\n");
      binmode(FILE, ":raw");
      print FILE "XSS URI           : $url\n";
      print FILE "Time of retrieval : ".get_time_str(time())."\n";
      print FILE "HTTP return code  : [".$res->code."] ".$res->message."\n";
      print FILE "Content-Type      : ".($res->content_type ? $res->content_type : "?")."\n";
      print FILE "Last modified     : ".($res->last_modified ? $res->last_modified : "?")."\n";
      print FILE "------ HTTP Headers ------\n".$res->headers_as_string."\n";
      print FILE "------ Requests ------\n";
      print FILE $_."\n" foreach (keys %{$evidence{$url}{"full"}});
      close(FILE);
    }

    # Check if we are appending hosts to existing data
    if (-e $filename2) {
      open(FILE, "<", $filename2) or mdie("Could not open '$filename2' for reading.\n");
      while (<FILE>) {
        if (/^(\d+\.\d+\.\d+\.\d+) *\|/) {
          if (defined($evidence{$url}{"hosts"}{$1})) {
            delete($evidence{$url}{"hosts"}{$1});
          }
        }
      }
      close(FILE);
      open(FILE, ">>", $filename2) or mdie("Could not open '$filename2' for appending.\n");
    } else {
      open(FILE, ">", $filename2) or mdie("Could not open '$filename2' for writing.\n");
    }
    foreach my $host (sort keys %{$evidence{$url}{"hosts"}}) {
      my $query = $dns->search($host);
      my @names = ();
      undef(@names);
      if ($query) {
        foreach my $rr ($query->answer) {
          push(@names, $rr->{"ptrdname"}) if defined($rr->{"ptrdname"});
        }
      }
      printf FILE "%-15s | %s\n", $host, join(" | ", @names);
    }
    close(FILE);

    # This entry has been handled, delete it
    delete($evidence{$url});

    # If not in report mode, handle only 5 fetched entries at time
    return unless ($reportmode || $fetched < 5);
  }
}


#############################################################################
### Entry management / handling functions
#############################################################################
### Check if given IP or host exists in array
sub check_hosts_array($$)
{
  my $chk_host = $_[1];
  my $chk_ip = new Net::IP($chk_host);
  foreach my $host (@{$_[0]}) {
    my $ip = new Net::IP($host);
    if (defined($chk_ip) && defined($ip)) {
      my $res = $chk_ip->overlaps($ip);
      if (defined($res)) {
        return 1 if ($res == $IP_IDENTICAL);
        return 2 if ($res == $IP_B_IN_A_OVERLAP);
        return 3 if ($res == $IP_A_IN_B_OVERLAP);
      }
    }
    return 4 if ($chk_host eq $host);
  }
  return 0;
}

### Check IP/host against | separated list of IPs/hosts
sub check_hosts($$)
{
  my @tmp = split(/\s*\|\s*/, $_[0]);
  return check_hosts_array(\@tmp, $_[1]);
}

### Execute iptables
sub exec_iptables(@)
{
  $ENV{"PATH"} = "";
  my @args = ($settings{"IPTABLES"}, @_);
  if ($settings{"DRY_RUN"}) {
    mlog(3, ":: ".join(" ", @args)."\n");
  } else {
    system(@args) == 0 or print join(" ", @args)." failed: $?\n";
  }
}

### Get current Netfilter INPUT table entries that match
### entry types we manage, e.g. filterlist
sub update_filterlist($)
{
  my $first = $_[0];
  return unless ($settings{"FILTER"} > 0);

  $ENV{"PATH"} = "";
  open(STATUS, $settings{"IPTABLES"}." -v -n -L INPUT |") or
    mdie("Could not execute ".$settings{"IPTABLES"}."\n");
  my %newlist = ();
  undef(%newlist);
  while (<STATUS>) {
    chomp;
    if (/^\s*(\d+)\s+\d+\s+$settings{"FILTER_TARGET"}\s+all\s+--\s+\*\s+\*\s+(\d+\.\d+\.\d+\.\d+)\s+0\.0\.0\.0\/0\s*$/) {
      my $mip = $2;
      if (!defined($filterlist{$mip})) {
        mlog(2, "* $mip appeared in iptables.\n") unless ($first < 0);
        $filterlist{$2} = time();
      }
      $newlist{$2} = 1;
      update_entry(\%statlist, $mip, -1, "IPTABLES", "", 0);
    }
  }
  close(STATUS);
  
  foreach my $mip (keys %filterlist) {
    if (!defined($newlist{$mip})) {
      mlog(2, "* $mip removed from iptables.\n");
      delete($filterlist{$mip});
    }
  }
}

### Check if given timestamp is _newer_ than weedperiod threshold.
### Returns false if timestamp is over weed period, e.g. needs weeding.
sub check_time1($)
{
  return ($_[0] > time() - ($settings{"FILTER_MAX_AGE"} * 60 * 60));
}

sub check_time2($)
{
  return ($_[0] > time() - ($settings{"STATS_MAX_AGE"} * 60 * 60));
}

sub check_time3($)
{
  return ($_[0] > time() - ($settings{"DRONEBL_MAX_AGE"} * 60));
}

### Weed out old entries
sub weed_do($)
{
  my $mtime = $filterlist{$_[0]};
  mlog(2, "* Weeding $_[0] (".get_time_str($mtime).")\n");
  exec_iptables("-D", "INPUT", "-s", $_[0], "-d", "0.0.0.0/0", "-j", $settings{"FILTER_TARGET"});
  delete($filterlist{$_[0]});
  delete($statlist{$_[0]});
  delete($ignorelist{$_[0]});
}

sub weed_entries()
{
  # Don't weed in report mode.
  return unless ($settings{"FILTER"} > 0 && $reportmode == 0);

  # Weed blocked entries.
  my @mips = keys %filterlist;
  foreach my $mip (@mips) {
    if (defined($statlist{$mip})) {
      if ($statlist{$mip}{"date2"} >= 0) {
        weed_do($mip) unless check_time1($statlist{$mip}{"date2"});
      } else {
        weed_do($mip);
      }
    } elsif (defined($filterlist{$mip})) {
      weed_do($mip);
    }
  }
  
  # Clean up old entries from other lists
  foreach my $mip (keys %statlist) {
    if (defined($statlist{$mip})) {
      my $mtime = $statlist{$mip}{"date2"};
      if (!check_time2($mtime) && !defined($filterlist{$mip})) {
        mlog(3, "* Deleting stale $mip (".get_time_str($mtime).")\n");
        delete($statlist{$mip});
      }
    }
  }

  foreach my $mip (keys %ignorelist) {
    if (defined($ignorelist{$mip})) {
      my $mtime = $ignorelist{$mip}{"date2"};
      if (!check_time2($mtime)) {
        mlog(3, "* Deleting stale ignored $mip (".get_time_str($mtime).")\n");
        delete($ignorelist{$mip});
      }
    }
  }
}

### Update one entry data
sub update_date($$)
{
  if (!defined($_[0]->{"date1"}) || ($_[1] > 0 && $_[0]->{"date1"} < 0)) {
    $_[0]->{"date1"} = $_[1];
  }
  if (!defined($_[0]->{"date2"}) || $_[1] > $_[0]->{"date2"}) {
    $_[0]->{"date2"} = $_[1];
  }
}

sub update_entry($$$$$$)
{
  my ($struct, $mip, $mdate, $mclass, $mreason, $addhits) = @_;

  return if check_hosts_array(\@noaction_ips, $mip);

  $struct->{$mip} = {} unless defined($struct->{$mip});
  my $entry = $struct->{$mip};
  $entry->{"reason"}{$mclass} = {} unless defined($entry->{"reason"}{$mclass});
  my $reason = $entry->{"reason"}{$mclass};

  $entry->{"dronebl"} = 0 unless defined($entry->{"dronebl"});
  
  # Add hits only when requested  
  if ($addhits) {
    $entry->{"hits"}++;
    $reason->{"hits"}++;
  } else {
    $entry->{"hits"} = 1 unless defined($entry->{"hits"});
    $reason->{"hits"} = 1 unless defined($reason->{"hits"});
  }

  # Messages is an array in reportmode
  if ($reportmode) {
    push(@{$reason->{"msg"}}, $mreason);
  } else {
    $reason->{"msg"} = $mreason;
  }

  # Update timestamps (generic and reason)
  update_date($entry, $mdate);
  update_date($reason, $mdate);
  
  return $entry->{"hits"};
}

### Check if given "try count" exceeds threshold and if entry
### is NOT in Netfilter already, then add it if so.
sub check_add_hit($$$$$$)
{
  my $mip = $_[0];
  my $mdate = str2time($_[1]);
  my $mclass = $_[2];
  my $mreason = $_[3];
  my $mtype = $_[4];
  my $mcond = $_[5];
  my $cnt;

  if (check_hosts_array(\@noaction_ips, $mip)) {
    mlog(2, "Hit to NOACTION_IPS($mip): [$mclass] $mreason\n");
    return;
  }
  
  # If condition is true, we add to regular statlist  
  if ($mcond) {
    $cnt = update_entry(\%statlist, $mip, $mdate, $mclass, $mreason, 1);
  } else {
    # This is an ignored hit (for disabled test), add to ignorelist
    update_entry(\%ignorelist, $mip, $mdate, $mclass, $mreason, 1);
    return;
  }

  # Check if we have exceeded threshold etc.
  if ($settings{"FILTER"} > 0 && $cnt >= $settings{"FILTER_THRESHOLD"} && check_time1($mdate)) {
    # Add to filterlist, unless already there.
    if (!defined($filterlist{$mip})) {
      mlog(1, "* Adding $mip \@ ".get_time_str($mdate).": [$mclass] $mreason\n");
      exec_iptables("-I", "INPUT", "1", "-s", $mip, "-j", $settings{"FILTER_TARGET"});
    }
    # Update date of last hit
    $filterlist{$mip} = $mdate;
  }

  # Separate check for DroneBL
  if ($mtype > 0 && $cnt >= $settings{"DRONEBL_THRESHOLD"} && check_time3($mdate)) {
    dronebl_queue($mip, $mdate, $mtype);
  }
}


#############################################################################
### Main helper functions
#############################################################################
### Print log entry
sub mlog($$)
{
  my $level = shift;
  my $msg = shift;
  if ($LOGFILE) {
    print $LOGFILE "[".get_time_str(time())."] ".$msg if ($settings{"VERBOSITY"} > $level);
  } elsif ($settings{"DRY_RUN"}) {
    print STDERR $msg if ($settings{"VERBOSITY"} > $level);
  }
}

### Like Perl's die(), but also print a logfile entry.
sub mdie($)
{
  mlog(-1, $_[0]) if ($LOGFILE);
  die($_[0]);
}

### Initialize
sub malt_init
{
  %statlist = ();
  undef(%statlist);
  %ignorelist = ();
  undef(%ignorelist);
  update_filterlist(-1);

  foreach my $filename (@scanfiles_once) {
    mlog(0, "Parsing [ONCE] ".$filename." ...\n");
    if (open(INFILE, "<", $filename)) {
      while (<INFILE>) {
        chomp;
        check_log_line($_);
      }
    } else {
      mlog(-1, "Could not open '".$filename."', skipping now.\n");
    }
    close(INFILE);
  }

  foreach my $filename (@scanfiles) {
    local *INFILE;
    mlog(0, "Initial parsing ".$filename." ...\n");
    if (open(INFILE, "<", $filename)) {
      $filehandles{$filename} = *INFILE;
      while (<INFILE>) {
        chomp;
        check_log_line($_);
      }
    } else {
      mlog(-1, "Could not open '".$filename."', skipping now.\n");
    }
  }
}

### Quick cleanup (not complete shutdown)
sub malt_cleanup
{
  foreach my $filename (keys %filehandles) {
    close($filehandles{$filename});
  }
}

sub malt_finish
{
  # Unlink pid-file
  if ($pid_file ne "" && -e $pid_file) {
    unlink $pid_file;
  }
  # Close logfile
  close($LOGFILE) if (defined($LOGFILE));
  undef($LOGFILE);
}

### Signal handlers
sub malt_int
{
  mlog(-1, "\nCaught Interrupt (^C), aborting.\n");
  malt_cleanup();
  malt_finish();
  exit(1);
}

sub malt_term
{
  mlog(-1, "Received TERM, quitting.\n");
  malt_cleanup();
  malt_finish();
  exit(1);
}

sub malt_hup
{
  mlog(-1, "Received HUP, reinitializing.\n");
  malt_cleanup();
  malt_configure();
  malt_init();
  mlog(-1, "Reinitialization finished, resuming scanning.\n");
}

sub malt_maintenance
{
  update_filterlist(time());
  weed_entries();
  generate_status($settings{"STATUS_FILE_PLAIN"}, 0);
  generate_status($settings{"STATUS_FILE_HTML"}, 1);
  evidence_gather();
  dronebl_process();
}

### Main scanning function
sub malt_scan
{
  mlog(1, "Entering main scanning loop.\n");
  my $counter = -1;
  while (1) {
    my %filepos = ();
    foreach my $filename (keys %filehandles) {
      for ($filepos{$filename} = tell($filehandles{$filename});
        $_ = readline($filehandles{$filename});
        $filepos{$filename} = tell($filehandles{$filename})) {
        chomp($_);
        check_log_line($_);
      }
    }
    sleep(1);
    foreach my $filename (keys %filehandles) {
      seek($filehandles{$filename}, $filepos{$filename}, 0);
    }
    if ($counter < 0 || $counter++ >= 30) {
      # Every once in a while, execute maintenance functions
      $counter = 0;
      malt_maintenance();
    }
  }
}

### Read one configuration file
sub malt_read_config($)
{
  my $filename = $_[0];
  my $errors = 0;
  my $line = 0;

  open(CONFFILE, "<", $filename) or mdie("Could not open configuration '".$filename."'!\n");
  while (<CONFFILE>) {
    $line++;
    chomp;
    if (/(^\s*#|^\s*$)/) {
      # Ignore comments and empty lines
    } elsif (/^\s*\"?([a-zA-Z0-9_]+)\"?\s*=>?\s*(\d+),?\s*$/) {
      my $key = uc($1);
      my $value = $2;
      if (defined($settings{$key})) {
        $settings{$key} = $value;
      } else {
        mlog(-1, "[$filename:$line] Unknown setting '$key' = $value\n");
        $errors = 1;
      }
    } elsif (/^\s*\"?([a-zA-Z0-9_]+)\"?\s*=>?\s*\"(.*?)\",?\s*$/) {
      my $key = uc($1);
      my $value = $2;
      if ($key eq "SCANFILE") {
        push(@scanfiles, $value);
      } elsif ($key eq "SCANFILE_ONCE") {
        push(@scanfiles_once, $value);
      } elsif ($key eq "NOACTION_IPS") {
        push(@noaction_ips, $value);
      } elsif (defined($settings{$key})) {
        $settings{$key} = $value;
      } else {
        mlog(-1, "[$filename:$line] Unknown setting '$key' = '$value'\n");
        $errors = 1;
      }
      # Force dry run mode if we are reporting only
      if ($reportmode) {
        $settings{"DRY_RUN"} = 1;
      }
    } else {
      mlog(-1, "[$filename:$line] Syntax error: $_\n");
      $errors = 1;
    }
  }
  close(CONFFILE);
  return $errors;
}

### Read all configuration files
sub malt_configure
{
  # Let user define his/her own logfiles to scan
  @scanfiles = ();
  undef(@scanfiles);

  @scanfiles_once = ();
  undef(@scanfiles_once);
  
  @noaction_ips = ();
  undef(@noaction_ips);

  foreach my $filename (@configfiles) {
    mdie("Errors in configuration file '$filename', bailing out.\n")
      unless (malt_read_config($filename) == 0);
  }

  # Clean up certain arrays duplicate entries
  my %saw = ();
  @scanfiles = grep(!$saw{$_}++, @scanfiles);

  %saw = ();
  @scanfiles_once = grep(!$saw{$_}++, @scanfiles_once);

  %saw = ();
  push(@noaction_ips, @noaction_ips_def);
  @noaction_ips = grep(!$saw{$_}++, @noaction_ips);
  undef(%saw);

  mlog(-1, "Not acting on IPs: ".join(", ", @noaction_ips)."\n");

  # Check if we have anything to do
  if ($reportmode) {
    mdie("Nothing to do, no SCANFILE(s) or SCANFILE_ONCE(s) defined in configuration.\n") unless ($#scanfiles > 0 || $#scanfiles_once > 0);
  } else {
    mdie("Nothing to do, no SCANFILE(s) defined in configuration.\n") unless ($#scanfiles > 0);
  }

  # General settings
  my $val = $settings{"STATS_MAX_AGE"};
  mdie("Invalid STATS_MAX_AGE value $val, must be > 0.\n") unless ($val > 0);

  # Filtering
  if ($settings{"FILTER"} > 0) {
    $val = $settings{"FILTER_MAX_AGE"};
    mdie("Invalid FILTER_MAX_AGE value $val, must be > 0.\n") unless ($val > 0);
    
    $val = $settings{"FILTER_THRESHOLD"};
    mdie("Invalid FILTER_THRESHOLD value $val, must be >= 0.\n") unless ($val >= 0);
  
    $val = $settings{"IPTABLES"};
    mdie("iptables binary does not exist or is not executable: $val\n") unless (-e $val && -x $val);
  } else {
    mlog(1, "Netfilter handling disabled.\n");
  }

  # Check evidence settings
  if ($settings{"EVIDENCE"} > 0) {
    my $base = $settings{"EVIDENCE_DIR"};
    mdie("Evidence directory (EVIDENCE_DIR) not set in configuration.\n") if ($base eq "");
    mdie("Evidence directory '$base' does not exist.\n") unless (-e $base);
    mdie("Path '$base' is not a directory.\n") unless (-d $base);
    mdie("Evidence directory '$base' is not writable by euid.\n") unless (-w $base);
  }
  
  # Sanitize DroneBL configuration
  if ($settings{"DRONEBL"} > 0) {
    mdie("DroneBL RPC key not set.\n") unless ($settings{"DRONEBL_RPC_KEY"} ne "");
  }
  
  # Check system account / passwd settings
  mdie("SYSACCT_MIN_UID must be >= 1.\n") unless ($settings{"SYSACCT_MIN_UID"} >= 1);
  mdie("SYSACCT_MAX_UID must be >= SYSACCT_MIN_UID.\n") unless ($settings{"SYSACCT_MAX_UID"} >= $settings{"SYSACCT_MIN_UID"});

  open(PASSWD, "<", $settings{"PASSWD"}) or mdie("Could not open '".$settings{"PASSWD"}."' for reading!\n");
  while (<PASSWD>) {
    my @fields = split(/\s*:\s*/);
    if ($fields[2] >= $settings{"SYSACCT_MIN_UID"} && $fields[2] <= $settings{"SYSACCT_MAX_UID"}) {
      $systemacct{$fields[0]} = $fields[2];
    }
  }
  close(PASSWD);
}

#############################################################################
###
### Main program
###
#############################################################################
# Setup signal handlers
$SIG{'INT'} = 'malt_int';
$SIG{'TERM'} = 'malt_term';
$SIG{'HUP'} = 'malt_hup';

# Print banner and help if no arguments
my $argc = $#ARGV + 1;
if ($argc < 1) {
  print STDERR $progbanner.
  "\n".
  "Usage: maltfilter <pid filename> [config filename] [config filename...]\n".
  "       maltfilter -f [config filename] [config filename...]\n".
  "-f turns on the full report mode.\n";
  exit;
}

# Test pid file existence unless report mode
$pid_file = shift;
if ($pid_file eq "-f") {
  $reportmode = 1;
  print STDERR $progbanner;
} else {
  mdie("'$pid_file' already exists, not starting.\n".
  "If the daemon is NOT running, remove the pid-file and re-start.\n")
  if (-e $pid_file);
}

# Read configuration files
while (defined(my $filename = shift)) {
  push(@configfiles, $filename);
}

malt_configure();

# Open logfile
if ($settings{"DRY_RUN"}) {
  print STDERR 
  "*********************************\n".
  "* NOTICE! DRY-RUN MODE ENABLED! *\n".
  "*********************************\n";
} elsif ($settings{"LOGFILE"} ne "") {
  open($LOGFILE, ">>", $settings{"LOGFILE"}) or die("Could not open logfile '".$settings{"LOGFILE"}."' for writing!\n");
  select((select($LOGFILE), $| = 1)[0]);
  mlog(-1, "Log started\n");
}

# Initialize
malt_init();

# Fork to background, unless dry-running
if ($settings{"DRY_RUN"}) {
  if ($reportmode) {
    malt_maintenance();
    malt_cleanup();
  } else {
    malt_scan();
    malt_cleanup();
  }
} else {
  if (my $pid = fork) {
    open(PIDFILE, ">", $pid_file) or mdie("Could not open pid file '".$pid_file."' for writing!\n");
    print PIDFILE "$pid\n";
    close(PIDFILE);
  } else {
    malt_scan();
    malt_cleanup();
  }
}