#!/usr/bin/perl
package SHADOW;
#
# fetchem.pl         - SHADOW Version 1.7
#                      Last changed 31 May 2001
#
#
#  Script to fetch tcpdump gzipped hourly logfiles from a sensor,
#  move them to an dated subdirectory on the analyzer, run through
#  a filter looking for suspicious events, and position that suspicious
#  events text file on a web page.
#
#  Optionally uses Perl module Compress::Zlib to directly read gzipped files.
#  
#  Written by Bill Ralph  <RalphWD@nswc.navy.mil>
#
# Set up some variables.
#
use Getopt::Long;
use POSIX qw(strftime);
use Time::Local;
#
#########################################################################
#
# Subroutine to return a method reference. (From "Programming Perl"
# Third Edition, p. 261.
#
sub get_method_ref {
   my ($self, $methodname) = @_;
   my $methref = sub {
      return $self->$methodname(@_);
   };
   return $methref;
}
##########################################################################
#
do "/usr/local/etc/SHADOW.conf" || 
   die("Unable to open SHADOW configuration file /usr/local/etc/SHADOW.conf.");
#
$tz = "LOC";
$snifdate = "";
$yr_format = "a4a2a2a2";
#
sub usage {
        print "Usage: fetchem -l Site {-debug} {-d YYYYMMDDHH}.\n";
	exit 2;
}
#
#	Parse the parameters.
#
#
&GetOptions("debug", \$debug, "d:s", \$snifdate, "l=s", \$Site);
#
#  Check parameter validity.
#
$sdlen = length($snifdate);
if (("$Site" eq "") or (($sdlen > 0) and ($sdlen != 10) ))
{
	usage();
}
##
#
#  Rid ourselves of standard output and error to prevent tons of mail messages
#  from cron.
#
@lvls = split(/\//,$0);				# Split path
($call, $suf) = split(/\./, $lvls[$#lvls]);	# Split call.suffix
$LOG_FILE = $debug ? "/tmp/$call.log" : "/dev/null";
open(STDOUT, ">>$LOG_FILE");
select STDOUT; $| = 1;
open(STDERR, ">&STDOUT");
select STDERR; $| = 1;              # make unbuffered

#
# Writer a marker to the log file.
#
$marker=strftime("%c", localtime(time));
print STDOUT $marker;
foreach $param ($0, @ARGV) {
   printf " %s", $param;
}
print STDOUT "\n";
#
# Once the Site is identified from the command line,
# load the needed external Parameters.
#
do "$SHADOW_SITE_PATH/${Site}.ph";
#
# If we have been called with a SNIFDATE parameter, set up the $subdir variable,
# else download the SNIFDATE from the sensor.
#
if ($snifdate eq "") {
   $snif_file="/tmp/" . $SITE . "_sensor.date";

   print STDOUT "SNIF file = $snif_file \n";

   if ( -f $snif_file) {
      unlink $snif_file;
   }
#
# Fetch current hour identity file from sensor. Remember this one 
# is the "CURRENT" hour. A date on the call line is the desired hour.
#
   $snif_cmd = "$SCP_CMD -q root\@${SENSOR}:$SENSOR_DIR/sensor.date $snif_file";

   if (system($snif_cmd) == 0) {
      print STDOUT "$snif_cmd\n";
   } else {
      die("Unable to connect to $SENSOR.");
   }
   $snifline=`cat $snif_file`;
   chomp($snifline);
   ($snifdate, $tz) = split(/\s+/, $snifline);
   unlink $snif_file;
}
#
#  Unpack the "snifdate" into its useful components.
#
($year, $mon, $mday, $hour_only) = unpack($yr_format, $snifdate);
#
# Compensate for the way Perl stores months and years.
$mon -= 1; 
$year -= 1900;
#
# Convert our snif date back to time format.
#
if ($tz eq "GMT") {
   $snif_time = timegm(0, 0, $hour_only, $mday, $mon, $year);
} else {
   $snif_time = timelocal(0, 0, $hour_only, $mday, $mon, $year);
}
#
# If the snifdate was obtained from the sensor, subtract an hour - (The
# sensor is working on the current hour.)
#
if ($sdlen == 0) {
   $snif_time -= 3599;
}
#
# Let's try to keep track of some times. I'm easily confused, so if our
# sensor is in a different time zone, we need to relate it to GMT cause
# that seems like the thing to do.
#
@gmt_snif_date = gmtime($snif_time);
#
# But at the same time, the person at the analyzer needs to relate to
# the time zone in which he is working. Mentally translating back and
# forth to GMT is a real pain in the posterior.
#
@loc_snif_date = localtime($snif_time);
#
# Lets put the web page information in the timezone of the analyzer. 
# What timezone are we in?
#
POSIX::tzset();
($tz_name, $tz_dst) = POSIX::tzname();
#
$TZ = ($tz_name, $tz_dst)[$loc_snif_date[8]];
$tz_diff = ( 24 + $gmt_snif_date[2] - $loc_snif_date[2]) % 24;
#
# I still haven't decided what to do with all this time zone stuff. If
# all your sensors are in your time zone, no problem. If your sensors are
# in different time zones, then we need to use GMT time... Or do we?
#
if ($tz_diff == 0 ) {
   @snif_date = @gmt_snif_date;
} else {
   @snif_date = @loc_snif_date;
}
#
$snifdate = strftime("%Y%m%d%H", @snif_date);
$hour_only = strftime("%H", @snif_date);
$hour = $hour_only . ":00";
$subdir = strftime("%b%d", @snif_date);
$output_dir = "${OUTPUT_WEB_DIR}/$subdir";
#
print STDOUT "snifdate = $snifdate, dir = $output_dir, hour = $hour\n";

#
# Predict the previous hour and next hour for html links.
#
$last_hour = strftime("%Y%m%d%H", localtime($snif_time - 3600));
$last_hour_dir = "../".strftime("%b%d", localtime($snif_time - 3600));
#
$next_hour = strftime("%Y%m%d%H", localtime($snif_time + 3600));
$next_hour_dir = "../".strftime("%b%d", localtime($snif_time + 3600));

print STDOUT "Last HR. = $last_hour, Next HR. = $next_hour\n";
print STDOUT "Last HR/dir = $last_hour_dir, Next HR/dir = $next_hour_dir\n";


#
#  Make sure subdirectory "MONXX" exists under $ANALYZER_DIR and
#  on web page
#
unless ( -d "$ANALYZER_DIR/$subdir") {
   mkdir("$ANALYZER_DIR/$subdir", 0755) 
    or die "Unable to mkdir $ANALYZER_DIR/$subdir: $?";
}
unless ( -d "$output_dir") {
   mkdir($output_dir, 0755) 
    or die "Unable to mkdir $output_dir";
}
#
# Prepare to copy down the raw gzipped tcpdump data file.
#
$src_prefix = "root\@${SENSOR}:";
$src_dir = "$SENSOR_DIR";
$src_file = "$src_dir/tcp.${snifdate}.gz";
$dst_dir = "$ANALYZER_DIR/$subdir";
$unzipped_file = "$dst_dir/tcp.$snifdate";
$zipped_file = $unzipped_file . ".gz";
#
# If our raw file is already on the analyzer, don't re-fetch it.
#
unless ( (-f $zipped_file)) {
#
# Let's see if the file exists on our sensor?
#
   $rmt_cmd = "$SSH_CMD -l root ${SENSOR} ls $src_file 2>&1";
   open(REMOTE, "$rmt_cmd|");
   $result = <REMOTE>;
   close(REMOTE);
   if ($result =~ /No such file/) {
      die("Unable to locate RAW data file on sensor.");
   }
   print STDOUT ("$SCP_CMD -q ${src_prefix}${src_file} $zipped_file\n");
#
# It's not on the analyzer, so fetch it from the sensor.
#
   system("$SCP_CMD -q ${src_prefix}${src_file} $zipped_file") == 0 or
      die("Unable to copy zipped Data file from $SENSOR.");
}
#
#  Call tcpdump to read the newly fetched file, scan it with our bad guys
#  filters, and write the results to the web page.
#
chdir($FILTER_DIR);
@filters = glob("*.filter");
#
# Add the filter to be used by the find_scan subroutine.
#
push @filters, "filter.getall";

print STDOUT "Filter names: @filters\n";

$output_txt_file = "$output_dir/$snifdate.txt";
$output_html_file = "$output_dir/$snifdate.html";
$prev_out_file = "$last_hour_dir/$last_hour.html";
$next_out_file = "$next_hour_dir/$next_hour.html";

print STDOUT "zip file = $zipped_file\n";
print STDOUT "output(txt) = $output_txt_file\nprev = $prev_out_file\n";
   print STDOUT "output(html) = $output_html_file\nnext = $next_out_file\n";

open(OUTPUT, ">$output_html_file");
#
# Write out the HTML header information to the html file.
#
print OUTPUT <<"EOF";
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Final//EN">
<HTML>
<HEAD>
<TITLE>Hourly tcpdump for $SITE on $subdir at $hour $TZ.</TITLE>
</HEAD>
<BODY BGCOLOR='FFFFE1''> 
<H3>Site: $SITE - Date: $subdir - $TZ: $hour.</H3>
<PRE>
EOF
close(OUTPUT);
#
# Create the text output file so each tcpdump iteration appends to it.
#
if (not -e $output_txt_file) {
   open(OUT_TEXT, ">$output_txt_file");
   close(OUT_TEXT);
#
# See if the Compress::Zlib module is available: Set up the open, read, and
# close functional references so that we can read the file.
#
#
# Open our zipped data file, start the unzipper, and then start up 
# individual processes to tcpdump it through the filters.
#
   if (eval "require Compress::Zlib") {
      import Compress::Zlib;
      $gz = gzopen($zipped_file, "rb");   
      $read_sub = get_method_ref($gz, 'gzread');
      $close_sub = get_method_ref($gz, 'gzclose');
      $end_of_file = get_method_ref($gz, 'gzerror');
   } else {
      $fh = *ZIP_CMD;
      $unzip_cmd = 'gunzip -c $zipped_file |';      
      open($fh, $unzip_cmd);
      $read_sub = sub {
         my $p1 = $_[0];
         my $p2 = $_[1];
         return sysread($fh, $p1, $p2) };
      $close_sub = sub { return(close($fh)) };
      $end_of_file = sub { return eof($fh) };
   }
#
#  Loop through site filters, call tcpdump and save the output text file.
#
   $filehandle = "fh00";
   $no_filters = scalar(@filters);
   for ($count =0; $count < $no_filters; $count++) {
      $fil = $filters[$count];
      $out_handle[$count] = $filehandle++;
      $out_file[$count] = $output_txt_file . "_$count";
      $out_cmd[$count] =  "$TCPDUMP_CMD -S -n -r - -F $fil ";
      $out_cmd[$count] .= " > $out_file[$count]";
      if ($count == $#filters) {
#
#  Add special file name and command for find_scan.pl script
#
         $out_cmd[$count] = "$TCPDUMP_CMD -t -n -r - -F $fil";
         $out_cmd[$count] .= " | sort -u";
         $out_cmd[$count] .= " | perl $SHADOW_PATH/find_scan.pl";
         $out_cmd[$count] .= " $out_file[$count] $SCAN_THRESHHOLD";
      }
   }
   $num_children = $no_filters;
#
# Open all the output commands...
#
   for ($count =0; $count < $num_children; $count++) {
      print STDOUT ("$out_cmd[$count]\n");
      open ($out_handle[$count], "|$out_cmd[$count]");
   }
#
# Our output commands are open, read a buffer load from the unzipper, 
# and feed it to each of the output tcpdump commands.
#
   $blksize = 16384;
   while ( $read_len = $read_sub->($buf, $blksize)) {
      if (!defined $read_len) {
         next if $! =~ /^Interrupted/;
         die "System read error: $!\n";
      }
      for ($count =0; $count < $num_children; $count++) {
         $write_len = $read_len;
         $offset = 0;
         while ($write_len ) {          # Handle partial writes.
            $written = syswrite($out_handle[$count], $buf, $write_len, $offset);
            die("System write error: $!\n")
               unless defined $written;
            $write_len -= $written;
            $offset += $written;
         }
      }
   }
   $close_sub->();

#
# Concatenate all the output from the "normal" filters onto the 
# $output_txt_file. 
#
# The last file is the one created by find_scan.pl. It will be copied into
# the HTML page a bit later.
#
   for ($count =0; $count < ($no_filters - 1); $count++) {
      close($out_handle[$count]);
      if ($out_file[$count]) {
         `cat $out_file[$count] >> $output_txt_file`;
         unlink($out_file[$count]);
      }
   }
   $scan_in_file = $out_file[$#filters];
}

#
# Clean  some tcpdump errors from the file before passing it along.
#
print STDOUT "Cleaning ${output_txt_file}.\n";

open(CLEAN, ">${output_txt_file}.cleaned") or 
        die "Can't open ${output_txt_file}.cleaned";
open(TEXTFILE, "<${output_txt_file}") or die "Can't open ${output_txt_file}";
while (<TEXTFILE>) {
   next if /gre-proto/;
   next if /trunc/;
   print CLEAN $_;
}
close(CLEAN);
close(TEXTFILE);
#
# Remove the original txt file.
#
unlink("${output_txt_file}");
rename("${output_txt_file}.cleaned", "${output_txt_file}");

print STDOUT "Text file cleaned of tcpdump exceptions.\n";
#
# Call script sort_and_resolve to sort the output file by IP address
# and resolve the DNS names.
#
print STDOUT "Calling sort_and_resolve.pl\n";

system("perl $SHADOW_PATH/sort_and_resolve.pl $output_txt_file") == 0 or
      die("Unable to sort and resolve $output_txt_file.");
#
# Add the output from the cleaning, sorting, and resolving to the HTML file.
#
print STDOUT "Copying ${output_txt_file}.sorted to $output_html_file.\n";

open(OUTPUT, ">>$output_html_file") or 
  die "Can't open $output_html_file";
open(TEXTFILE, "<${output_txt_file}.sorted") or 
  die "Can't open ${output_txt_file}.sorted";
while (<TEXTFILE>) {
   print OUTPUT $_;
}
close(TEXTFILE);
#
# Add a divider line to the HTML page.
#
print OUTPUT "<HR SIZE=3>\n";
#
#
# The find_scan.pl script created a file in the output directory with the
# results of the scan. If it exists, copy it to the tail end of the html
# file.
#
close($out_handle[$#filters]);
print STDOUT "Copying results to html file\n";
if ( -e "$scan_in_file") {
   open(RESULTS, "<$scan_in_file") 
      or die "Can't open $scan_in_file";
   while (<RESULTS>) {
      print OUTPUT $_;
   }
   close(RESULTS);
}
unlink($scan_in_file);
#
# Add a divider line to the HTML page.
#
print OUTPUT "<HR SIZE=3>\n";
#
#
# Append the date information and navigation bar to the end of the HTML page.
#
print OUTPUT <<"EOF";
</PRE>
<H3>Site: $SITE - Date: $subdir - $TZ: $hour.</H3>
<HR>
<TABLE CELLSPACING="0" CELLPADDING="0">
<TR>
<TD ALIGN=CENTER VALIGN=MIDDLE><A HREF="$prev_out_file"><IMG SRC="/images/navbars/2/1.jpg" WIDTH="90" HEIGHT="20" BORDER="0" HSPACE="0" VSPACE="0"></A><IMG SRC="/images/navbars/2/2.jpg" WIDTH="110" HEIGHT="20" HSPACE="0" VSPACE="0"><A HREF="/tcpdump_results/index.html" TARGET="_top"><IMG SRC="/images/navbars/2/3.jpg" WIDTH="50" HEIGHT="20" BORDER="0" HSPACE="0" VSPACE="0"></A><IMG SRC="/images/navbars/2/2.jpg" WIDTH="110" HEIGHT="20" HSPACE="0" VSPACE="0"><A HREF="$next_out_file"><IMG SRC="/images/navbars/2/5.jpg" WIDTH="90" HEIGHT="20" BORDER="0" HSPACE="0" VSPACE="0"></A></TD></TR>
</TABLE>

</BODY>
</HTML>
EOF
close(OUTPUT);
#
# Clean up temporary files and exit.
#
unlink("${output_txt_file}") if ( -e "${output_txt_file}");
unlink("${output_txt_file}.sorted") if ( -e "${output_txt_file}.sorted");
unlink("$scan_output_file") if ( -e "$scan_output_file");
#
printf STDOUT "%s : %s\n", strftime("%c", localtime(time)), "$0 completed.";
#
