#!/usr/bin/perl
#
# fetchem.pl         - SHADOW Version 1.6
#                      Last changed 29 Sep 1999
#
#
#  Script to fetch tcpdump gzipped hourly logfiles from a sensor,
#  move them to an dated subdirectory on the analyzer, run through
#  a filter looking for suspicious events, and position that suspicious
#  events text file on a web page.
#  
#  Written by Bill Ralph 3/12/98
#
# Set up some variables.
#
use Getopt::Long;
use POSIX qw(strftime);
use Time::Local;
#
$SHADOW_PATH = "/usr/local/logger";
$ENV{PATH} = "/bin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin:$SHADOW_PATH";
$tz = "LOC";
$snifdate = "";
#
sub usage {
        print "Usage: fetchem -l Site {-debug} {-d YYYYMMDDHH}.\n";
	exit 2;
}
#
#	Parse the parameters.
#
#
&GetOptions("debug", \$debug, "d:s", \$snifdate, "l=s", \$Site);
#
#  Check parameter validity.
#
$sdlen = length($snifdate);
if (("$Site" eq "") or (($sdlen > 0) and ($sdlen != 10) and ($sdlen != 8)))
{
	usage();
}
$yr_format = ($sdlen == 8) ? "a2a2a2a2" : "a4a2a2a2";
##
#
#  Rid ourselves of standard output and error to prevent tons of mail messages
#  from cron.
#
@lvls = split(/\//,$0);				# Split path
($call, $suf) = split(/\./, $lvls[$#lvls]);	# Split call.suffix
$LOG_FILE = $debug ? "/tmp/$call.log" : "/dev/null";
open(STDOUT, ">>$LOG_FILE");
select STDOUT; $| = 1;
open(STDERR, ">&STDOUT");
select STDERR; $| = 1;              # make unbuffered

#
# Writer a marker to the log file.
#
$marker=strftime("%c", localtime(time));
print STDOUT $marker;
foreach $param ($0, @ARGV) {
   printf " %s", $param;
}
print STDOUT "\n";
#
# Once the Site is identified from the command line,
# load the needed external Parameters.
#
unshift(@INC, "$SHADOW_PATH/sites");
require "${Site}.ph";

#
#
# If we have been called with a SNIFDATE parameter, set up the $subdir variable,
# else download the SNIFDATE from the sensor.
#
if ($snifdate eq "") {
   $snif_file="/tmp/" . $SITE . "_sensor.date";

   print STDOUT "SNIF file = $snif_file \n";

   if ( -f $snif_file) {
      unlink $snif_file;
   }
#
# Fetch current hour identity file from sensor. Try both 4 and 2 digit 
# date formats. Remember this one is the "CURRENT" hour. A date on the
# call line is the desired hour.
#
   $cmd4 = "scp -q root\@${SENSOR}:$SENSOR_DIR/sensor.date $snif_file";
   $cmd2 = "scp -q root\@${SENSOR}:$SENSOR_DIR/snif.date $snif_file";

   if (system($cmd4) == 0) {
      print STDOUT "$cmd4\n";
      $yr_format = "a4a2a2a2";
   } elsif (system($cmd2) == 0) {
      print STDOUT "$cmd2\n";
      $yr_format = "a2a2a2a2";
   } else {
      die("Unable to connect to $SENSOR.");
   }
   $snifline=`cat $snif_file`;
   chomp($snifline);
   ($snifdate, $tz) = split(/\s+/, $snifline);
   unlink $snif_file;
}
#
#  Unpack the "snifdate" into its useful components.
#
($year, $mon, $mday, $hour_only) = unpack($yr_format, $snifdate);
$mon -= 1; 
if (!($yr_format =~ /4/)) {
#
# Let's try a "sliding window" here: If the year > 90, assume 20th century.
#                                    If the year < 20, assume 21 century.
#
   if ($year > 90) {
      $snifdate = "19" . $snifdate;
   } elsif ($year < 20) {
      $year += 100;
      $snifdate = "20" . $snifdate;
   }
}
#
# Convert our snif date back to time format.
#
if ($tz eq "GMT") {
   $snif_time = timegm(0, 0, $hour_only, $mday, $mon, $year);
} else {
   $snif_time = timelocal(0, 0, $hour_only, $mday, $mon, $year);
}
#
# If the snifdate was obtained from the sensor, subtract an hour - (The
# sensor is working on the current hour.)
#
if ($sdlen == 0) {
   $snif_time -= 3599;
}
#
# Let's try to keep track of some times. I'm easily confused, so if our
# sensor is in a different time zone, we need to relate it to GMT cause
# that seems like the thing to do.
#
@gmt_snif_date = gmtime($snif_time);
#
# But at the same time, the person at the analyzer needs to relate to
# the time zone in which he is working. Mentally translating back and
# forth to GMT is a real pain in the posterior.
#
@loc_snif_date = localtime($snif_time);
#
# Lets put the web page information in the timezone of the analyzer. 
# What timezone are we in?
#
POSIX::tzset();
($tz_name, $tz_dst) = POSIX::tzname();
#
$TZ = ($tz_name, $tz_dst)[$loc_snif_date[8]];
$tz_diff = ( 24 + $gmt_snif_date[2] - $loc_snif_date[2]) % 24;
#
# I still haven't decided what to do with all this time zone stuff. If
# all your sensors are in your time zone, no problem. If your sensors are
# in different time zones, then we need to use GMT time... Or do we?
#
if ($tz_diff == 0 ) {
   @snif_date = @gmt_snif_date;
} else {
   @snif_date = @loc_snif_date;
}
#
$snif2date = strftime("%y%m%d%H", @snif_date);
$snif4date = strftime("%Y%m%d%H", @snif_date);
$snifdate = $snif4date;
$hour_only = strftime("%H", @snif_date);
$hour = $hour_only . ":00";
$subdir=strftime("%b%d", @snif_date);
#
print STDOUT "snif2date = $snif2date, dir = $subdir, hour = $hour\n";
print STDOUT "snif4date = $snif4date, dir = $subdir, hour = $hour\n";

#
# Predict the previous hour and next hour for html links.
#
$last2hour = strftime("%y%m%d%H", localtime($snif_time - 3600));
$last4hour = strftime("%Y%m%d%H", localtime($snif_time - 3600));
$last_hour = $last4hour;
$last_hour_dir = "../".strftime("%b%d", localtime($snif_time - 3600));
#
$next2hour = strftime("%y%m%d%H", localtime($snif_time + 3600));
$next4hour = strftime("%Y%m%d%H", localtime($snif_time + 3600));
$next_hour = $next4hour;
$next_hour_dir = "../".strftime("%b%d", localtime($snif_time + 3600));

print STDOUT "Last HR. = $last_hour, Next HR. = $next_hour\n";
print STDOUT "Last HR/dir = $last_hour_dir, Next HR/dir = $next_hour_dir\n";


#
#  Make sure subdirectory "MONXX" exists under $ANALYZER_DIR and
#  on web page
#
unless ( -d "$ANALYZER_DIR/$subdir") {
   mkdir("$ANALYZER_DIR/$subdir", 0755) 
    or die "Unable to mkdir $ANALYZER_DIR/$subdir: $?";
}
unless ( -d "${OUTPUT_WEB_DIR}/$subdir") {
   mkdir("${OUTPUT_WEB_DIR}/$subdir", 0755) 
    or die "Unable to mkdir ${OUTPUT_WEB_DIR}/$subdir";
}
#
# Prepare to copy down the raw gzipped tcpdump data file.
#
$src_prefix = "root\@${SENSOR}:";
$src_dir = "$SENSOR_DIR";
$src2file = "$src_dir/tcp.${snif2date}.gz";
$src4file = "$src_dir/tcp.${snif4date}.gz";
$dst_dir = "$ANALYZER_DIR/$subdir";
$dst4file = "$dst_dir/tcp.$snif4date.gz";
$dst2file = "$dst_dir/tcp.$snif2date.gz";
$zipped_file = $dst4file;
$zipped_file = $dst2file if (-f $dst2file);
#
# If our raw file is already on the analyzer, don't re-fetch it.
#
unless ( (-f $dst2file ) || (-f $dst4file)) {
#
# Let's see which style of file exists on our sensor 2 or 4-digit?
#
   $rmt2cmd = "/usr/local/bin/ssh -l root ${SENSOR} ls $src2file 2>&1";
   $rmt4cmd = "/usr/local/bin/ssh -l root ${SENSOR} ls $src4file 2>&1";
   open(REMOTE, "$rmt4cmd|");
   $result = <REMOTE>;
   close(REMOTE);
   if ($result =~ /No such file/) {
      open(REMOTE, "$rmt2cmd|");
      $result = <REMOTE>;
      close(REMOTE);
      if ($result =~ /No such file/) {
         die("Unable to locate RAW data file on sensor.");
      } else {
        $src_file = $src2file;
      }
   } else {
      $src_file = $src4file;
   }
   print STDOUT ("scp -q ${src_prefix}${src_file} $zipped_file\n");
#
# It's not here, we know its date style, fetch it from the sensor.
#
   system("scp -q ${src_prefix}${src_file} $zipped_file") == 0 or
      die("Unable to copy zipped Data file from $SENSOR.");
}
#
#  Call tcpdump to read the newly fetched file, scan it with our bad guys
#  filters, and write the results to the web page.
#
chdir($FILTER_DIR);
@filters = glob("*.filter");

print STDOUT "Filter names: @filters\n";

$output_txt_file="$OUTPUT_WEB_DIR/$subdir/$snifdate.txt";
$output_html_file="$OUTPUT_WEB_DIR/$subdir/$snifdate.html";
$prev_out_file="$last_hour_dir/$last_hour.html";
$next_out_file="$next_hour_dir/$next_hour.html";

print STDOUT "zip file = $zipped_file\n";
print STDOUT "output(txt) = $output_txt_file\nprev = $prev_out_file\n";
   print STDOUT "output(html) = $output_html_file\nnext = $next_out_file\n";

open(OUTPUT, ">$output_html_file");
print OUTPUT <<"EOF";
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Final//EN">
<HTML>
<HEAD>
<TITLE>Hourly tcpdump for $SITE on $subdir at $hour $TZ.</TITLE>
</HEAD>
<BODY BGCOLOR='FFFFE1''> 
<H3>Site: $SITE - Date: $subdir - $TZ: $hour.</H3>
<PRE>
EOF
close(OUTPUT);
#
# Create the text output file so each tcpdump iteration appends to it.
#
if (not -e $output_txt_file) {
   open(OUT_TEXT, ">$output_txt_file");
#
#  Loop through site filters, call tcpdump and save the output text file.
#
   for $fil (@filters) {
      $cmd = "gunzip -c $zipped_file" 
              . " | " 
              . "tcpdump -S -n -r - -F $fil >> $output_txt_file";


      print STDOUT ("$cmd\n");

      open(IN_CMD, "$cmd|") 
         or die("Unable to spawn tcpdump command.");
      while (<IN_CMD>) {
         print OUT_TEXT $_;
      }
      close(IN_CMD);
   }
   close(OUT_TEXT);
}

#
# Clean  some tcpdump errors from the file before passing it along.
#
print STDOUT "Cleaning ${output_txt_file}.\n";

open(CLEAN, ">${output_txt_file}.cleaned") or 
        die "Can't open ${output_txt_file}.cleaned";
open(TEXTFILE, "<${output_txt_file}") or die "Can't open ${output_txt_file}";
while (<TEXTFILE>) {
   next if /gre-proto/;
   next if /trunc/;
   print CLEAN $_;
}
close(CLEAN);
close(TEXTFILE);
#
# Remove the original txt file.
#
unlink("${output_txt_file}");
rename("${output_txt_file}.cleaned", "${output_txt_file}");

print STDOUT "Text file cleaned of tcpdump exceptions.\n";
#
# Call script sort_and_resolve to sort the output file by IP address
# and resolve the DNS names.
#
print STDOUT "Calling sort_and_resolve.pl\n";

system("perl $SHADOW_PATH/sort_and_resolve.pl $output_txt_file");
#
# Finish off the html file with closing arguments.
#
print STDOUT "Copying ${output_txt_file}.sorted to $output_html_file.\n";

open(OUTPUT, ">>$output_html_file") or 
  die "Can't open $output_html_file";
open(TEXTFILE, "<${output_txt_file}.sorted") or 
  die "Can't open ${output_txt_file}.sorted";
while (<TEXTFILE>) {
   print OUTPUT $_;
}
close(TEXTFILE);
print OUTPUT <<"EOF";
<HR SIZE=3>
EOF
#
# Run script to track those source IP addresses that hit multiple destination
# addresses (greater than a threshhold).
#
print STDOUT "Calling $SHADOW_PATH/find_scan.pl $SITE $zipped_file $$.\n";

system("perl $SHADOW_PATH/find_scan.pl $SITE $zipped_file $$");

print STDOUT "Copying results to html file\n";

open(RESULTS, "</tmp/${SITE}_$$.results") 
   or die "Can't open /tmp/${SITE}_$$.results";
while (<RESULTS>) {
   print OUTPUT $_;
}
close(RESULTS);
print OUTPUT <<"EOF";
</PRE>
<H3>Site: $SITE - Date: $subdir - $TZ: $hour.</H3>
<HR>
<TABLE CELLSPACING="0" CELLPADDING="0">
<TR>
<TD ALIGN=CENTER VALIGN=MIDDLE><A HREF="$prev_out_file"><IMG SRC="/images/navbars/2/1.jpg" WIDTH="90" HEIGHT="20" BORDER="0" HSPACE="0" VSPACE="0"></A><IMG SRC="/images/navbars/2/2.jpg" WIDTH="110" HEIGHT="20" HSPACE="0" VSPACE="0"><A HREF="/tcpdump_results/index.html" TARGET="_top"><IMG SRC="/images/navbars/2/3.jpg" WIDTH="50" HEIGHT="20" BORDER="0" HSPACE="0" VSPACE="0"></A><IMG SRC="/images/navbars/2/2.jpg" WIDTH="110" HEIGHT="20" HSPACE="0" VSPACE="0"><A HREF="$next_out_file"><IMG SRC="/images/navbars/2/5.jpg" WIDTH="90" HEIGHT="20" BORDER="0" HSPACE="0" VSPACE="0"></A></TD></TR>
</TABLE>

</BODY>
</HTML>
EOF
close(OUTPUT);
#
# For compatability with the older 2-digit versions, add a 2 digit year
# link to the html file.
#
chdir("$OUTPUT_WEB_DIR/$subdir/");
symlink("$snifdate.html", "$snif2date.html");
#
# Clean up temporary files and exit.
#
unlink("${output_txt_file}")
   or die("Unable to remove ${output_txt_file}");
unlink("${output_txt_file}.sorted")
   or die("Unable to remove ${output_txt_file}.sorted.");
unlink("/tmp/${SITE}_$$.results")
   or die("Unable to remove /tmp/${SITE}_$$.results.");
print STDOUT "Fetchem completed.\n";
