#!/usr/bin/python
#spikeProxyUI.py, a basic web based interface for the SPIKE Proxy

#Version: 1.2
#License: GPL v 2.0
#
#externally, offers 2 classes:
#   wantsRequest, which takes in a header and returns a boolean, yes or no
#   handleRequest, which handles a request as defined by header and body
#that's it. :> Hmm. not quite correct actually

###Known Bugs
#FIXED 1. Some sites use /bob.extention/argument=value&arg2=val2 . . . we don't
#handle this right at the moment. (see getPageH() comment )
#2. The economist.com site fails. They appear to break spec Grr.
#3. locking not implemented in key sections, like the donotlog array, request cache, etc

##############
#Features to do
#
#crawling
#directory finding
#delete tree/page/site
#persistant bugs found page
#some sort of whisker behavior - possibly reading whisker2 files and
#  parsing those

######IMPORTS
#global imports
import os
import dircache
import cPickle
import urllib
import urlparse
import time
#my imports
from requestandresponse import RequestAndResponse
import daveutil
import spkproxy
#END IMPORTS

#global
notimplementedyet="<html><head><title>Error</title></head><body><h1>Not implemented yet, sorry.</h1></body></html>"

#extentions that are never valid html pages
invalidHTMLExtentions=["ppt","pdf","tgz","gz","zip","jpg","gif","png","cab","tar","sig"]

#these two lists need some serious filling in
#we check for www.site.com/file.html.bak for example
backupSuffixList=["~",".bak",".backup"]
#we check for www.site.com/directory/ws_ftp.log, etc
fileRotList=["ws_ftp.log",".bash_history",".private"]

#here we do some wackyness to read in the words file.
def chomp(line):
    line = line.split('\n')[0]
    return line.split('\r')[0]

f=open("words","r") 
#read entire file
contents=f.readlines()
f.close()
scanDirList = [chomp(line) for line in contents]
print "Loaded wordlist from \"words\""


#main class
class spkProxyUI:
    
    def __init__(self):
        self.triggerhost="spike"
        self.basedir=daveutil.pathjoin(os.getcwd(),"spikeProxyUI/")
        self.dostore=1
        self.stopallactions=0
        self.parent=None
        #if the path doesn't exit, make it exist
        daveutil.dmkdir(self.basedir)
        #set up our keywords function table
        self.initkeywords()
        self.setupfuzzstrings()
        self.requestCache=[]
        self.requestCacheMaxLength=500
        self.logs=[]
        self.maxlogs=1500
        self.setupTriggers()
        #stores objects we don't want to talk about
        self.nottolog=[]

        self.log("SPIKE UI version 1.2 Started")
        return


    def setParent(self,parent):
        self.parent=parent

    #any sort of trigger based on the header is supported,
    #but we just look at the hostname
    def wantsRequest(self,myheader):
        if myheader.connectHost==self.triggerhost:
            return 1

        if myheader.URLargsDict.has_key("SPIKE_TRIGGER"):
            return 1
        
        return 0

    #handles the request and returns a string
    #as the response
    def handleRequest(self,myheader,mybody):
        if not self.wantsRequest(myheader):
            return "Uh, why did we get this request?"


        extention=myheader.URL.split(".")[-1]
        #print "extention: "+extention
        if extention=="html" and not myheader.URLargsDict.has_key("SPIKE_TRIGGER"):
            #print "html request received with no SPIKE_TRIGGER"
            filename=myheader.URL.replace("http://spike/","")
            #print "filename="+filename
            return self.serveFile(filename)

        urlfile=myheader.URL.split("/")[-1]
        #handle all the wacky file stuff
        file=""
        data=""
        haveheader=0
        
        if myheader.URLargsDict.has_key("file"):
            file = myheader.URLargsDict["file"]

        #print "Handling: urlfile="+urlfile+" file="+file+" SPIKE_TRIGGER="+str(myheader.URLargsDict.has_key("SPIKE_TRIGGER"))
        #print "myheader.URL="+myheader.URL
        
        if urlfile=="getinfo":
            data=self.getinfo(file)
        elif urlfile=="stop":
            self.stopallactions=1
            data="All actions stopped"
        elif urlfile=="allow":
            self.stopallactions=0
            data="All actions allowed"
        elif urlfile=="crawl":
            data=self.crawl(file)
        elif urlfile=="argscan":
            data=self.argscan(file)
        elif urlfile=="dirscan":
            data=self.dirscan(file)
        elif urlfile=="overflow":
            data=self.overflow(file)
        elif urlfile=="displayresponse":
            data=self.displayResponse(file)
            haveheader=1
        #print a form for calling sendrequest
        elif urlfile=="rewrite" and myheader.URLargsDict.has_key("SPIKE_TRIGGER"):
            #print "rewriting"
            data= self.rewrite(file)
        elif urlfile=="sendrequest" and myheader.URLargsDict.has_key("SPIKE_TRIGGER"):
            #send out a new request and return the results
            data=self.sendrequest(myheader,mybody)
            haveheader=1

        #otherwise, let's move on and handle this request
        if myheader.URL=="/":
            filename="index.html"
            #print "filename="+filename
            return self.serveFile(filename)

        
        if (data==""):
            byestring=notimplementedyet
            return "HTTP/1.1 501 Not implemented!\r\nContent-Length: "+str(len(byestring))+"\r\n\r\n"+byestring
        else:
            if not haveheader:
                data=self.addHeader(data)
            return data
    
    #registers the header and body of the request and response in our store
    def registerRequestandResponse(self,clientheader,clientbody,serverheader,serverbody):

        if self.dostore==0:
            return 1
        #basically we organize things as first Sites, then pages,
        #then requests+responses

        #we need to check this in case we are crawling or otherwise don't want to store this
        #page
        if clientheader in  self.nottolog:
            return 1
        
        #do we have this "site" in our store?
        #A site is defined by host,port,isSSL
        site=self.getSiteFromHeader(clientheader)
        
        if not self.haveSiteInStore(site):
            self.createSite(site)

        #need to check for if we have this page or not
        #a page is just: /foo/bar.php or similar
        page=self.getPageH(clientheader)
        if not self.havePageInStore(page):
            self.createPage(page)

        #we don't want to store duplicates
        if self.duplicateRequestandResponse(clientheader,clientbody,serverheader,serverbody):
            #print "Duplicate request and response"
            return 1

        #print "before store: "+str(clientheader)+" Type: "+str(type(clientheader))
        #otherwise, we need to store this request and response off
        result=self.storeRequestandResponse(clientheader,clientbody,serverheader,serverbody)
        return result
    

        
    ###########################################################################
    #End public methods
    ###########################################################################
    def getSiteFromHeader(self,clientheader):
        return [clientheader.connectHost,str(clientheader.connectPort),str(clientheader.clientisSSL)]

    #converts a site to a string, just a .join call for now
    def sitestr(self,site):
        #print "Site="+str(site)
        return self.strencode("_".join(site))

    def sitestrh(self,clientheader):
        return self.sitestr(self.getSiteFromHeader(clientheader))
    
    #returns a 1 if we have that site in our store
    def haveSiteInStore(self,site):
        sitename=self.sitestr(site)
        result= os.path.isdir(daveutil.pathjoin(self.basedir,sitename))
        #print "Do we have "+sitename+" in our store: "+str(result)
        return result
                              

    #creates a "site" store on disk
    def createSite(self,site):
        #is this / going to bite us when we go win32? Who cares? :>
        #fixed with daveutil.pathjoin!
        os.mkdir(daveutil.pathjoin(self.basedir,self.sitestr(site)))
        return 1
        

    #returns the entire directory structure for a given page
    def pagestrh(self,clientheader):
        #not too complex
        return daveutil.pathjoin(self.sitrstrh(clientheader),self.getPageH(clientheader))

    #rips off the arguments and stuff to yield a nice /bob/dave.php
    #takes in a clientheader, not a string!
    #TODO: this fails currently for urls with a asdf.ng/bob=asdf&asdf=asdf
    #syntax
    def getPageH(self,clientheader):
        #we already have this stored in the client header
        #print "getPageH "+str(clientheader)+" Type: "+str(type(clientheader))
        site=self.sitestrh(clientheader)
        #dunno why this happens, but it does - techinsurance.com does it
        if clientheader.URL=="":
            clientheader.URL="/"
        if clientheader.URL[-1]=="/":
            return daveutil.pathjoin(site,clientheader.URL,"_directory_")
        else:
            return daveutil.pathjoin(site,clientheader.URL)

    #strip off the following dave.php and leave /bob/
    def getDir(self,page):
        return "/"+os.path.dirname(page)

    #returns boolean value for whether we've seen this page before
    def havePageInStore(self,page):
        dir=self.getDir(page)
        wholepath, filename = os.path.split(page)
        return os.path.isdir(daveutil.pathjoin(self.basedir,dir,filename))

    #creates a directory for our page. It looks like this: ./sitebase/bob/dave.php/
    def createPage(self,page):
        #used to do some crazy stuff here, but it's really quite simple
        #print "page="+page
        #print "basedir="+self.basedir
        dirtomake=daveutil.pathjoin(self.basedir,page)
        #print "Trying to make dir "+dirtomake
        daveutil.dmkdir(dirtomake)
        return 1

    #returns 1 if it's a request and response we've seen before, otherwise 0
    def duplicateRequestandResponse(self,clientheader,clientbody,serverheader,serverbody):
        #print "inside duplicateRequestandResponse"
        #first get a list of the files in page's directory. the directory
        #is guaranteed to exist
        pagedir=daveutil.pathjoin(self.basedir,self.getPageH(clientheader))
        filelist=dircache.listdir(pagedir)
        #print "pagedir="+pagedir
        #print "filelist="+str(filelist)

        #order N operation here...we iterate over data
        newhash=daveutil.genhash(clientheader,clientbody,serverheader,serverbody)
        #print "Done with hashing in duplicateRequestandReponse"
        
        #we just compare hashes now
        for afile in filelist:
            #ignore directories
            if os.path.isdir(afile):
                continue
            oldhash=afile.split("_")[0]
            if oldhash==newhash:
                return 1

        #print "Unique object: leaving duplicateRequestandResponse"
        return 0

    def strencode(self,astring):
        return daveutil.strencode(astring)
                          

        #stores a request and response into our file structure for later retrival
    def storeRequestandResponse(self, clientheader,clientbody,serverheader,serverbody):
        #print "instore "+str(clientheader)+" Type: "+str(type(clientheader))
        dir=daveutil.pathjoin(self.basedir,self.getPageH(clientheader))
        #print "Dir: "+dir
        hash=daveutil.genhash(clientheader,clientbody,serverheader,serverbody)
        #we encode the directory name (the full page) for easy uniqueness test
        filename=daveutil.pathjoin(dir,hash+"_"+self.strencode(clientheader.connectHost))

        #here we check for any ODBC strings or whatnot, and if we don't
        #see one of those, and we are in the "don't store" list, we just
        #return
        triginfo=self.scanForTriggers(serverheader,serverbody)
        if triginfo=="" and clientheader in self.nottolog:
            return 1

        #print out the warning
        if triginfo!="":
            self.log("Warning: "+filename+" triggered "+triginfo)

        #print "Storing request in filename: "+filename
        
        obj=RequestAndResponse(clientheader,clientbody,serverheader,serverbody)
        
        #print "obj: "+str(obj)
        #obj.printme()
        openfile=open(filename,"w")
        #print "openfile="+str(openfile)+" object: "+str(obj)
        binary=1
        cPickle.dump(obj,openfile,binary)
        openfile.close()
        #print "Done storing request in filename: "+filename
        #print "Now saving in requestCache"
        #ok, now we need to store it in our bucket of things we've just done for the request cache

        self.saveInRequestCache(filename)
        return 1

    #serve a file, replacing keywords with something appropriate
    #used for static html files. not spike cgi requests
    def serveFile(self,filename):
        #print "serving file "+filename
        #strip this last bit off
        mybase=self.basedir.replace("spikeProxyUI/","")
        realfilename=daveutil.pathjoin(mybase,filename)
        if os.path.isfile(realfilename):
            file=open(realfilename,"r")
            data=file.read()
            file.close()
        else:
            data="Error in Spike Proxy UI - No file found: "+realfilename


        for word in self.keywords.keys():
            if data.count(word) != 0:
                data=data.replace(word,self.runkeyword(word))
                
        header=""
        #adds both the header and the data
        header+=self.addHeader(data)

        #print "done serving file "+filename
        response=header
        return response

    #init function to set up our function list for keywords in our responses
    def initkeywords(self):
        self.keywords={}
        self.keywords["***SITES***"]=self.getSites
        self.keywords["***requestcache***"]=self.printRequestCache
        self.keywords["***LOGS***"]=self.printLogs
        return

    #called whenever we find a keyword in our response - used to replace
    #things in html files we respond with
    #returns an html string
    def runkeyword(self,word):
        #print "Running keyword: "+word
        if word not in self.keywords:
            return "Some sort of keyword error: keyword "+word+" not found."

        return self.keywords[word]()
        

    #returns an html string of all the sites we have seen
    def getSites(self):
        sitedir=daveutil.pathjoin(self.basedir)
        return self.htmlDirectory(sitedir)

    def printRequestCache(self):
        result="<ul>"
        for file in self.requestCache:
            display=file.split("/")[-1]
            link=file.replace(self.basedir,"")
            start="Request: "
            result+="<li> "+ start + display + " <br>  " +  self.getOptions(file,1)+"</li>"
        return result

    def printLogs(self):
        result=""
        for log in self.logs:
            result+="Log: "+log+"<br>"
        return result

    #takes in a directory name and returns an html string representing all the fun you can
    #have with it!
    def htmlDirectory(self,dir):
        result="<ul>"
        #print "htmlDirectory on "+dir
        filelist=os.listdir(dir)
        #print "str(filelist)="+str(filelist)
        for site in filelist:
            if not os.path.isdir(daveutil.pathjoin(dir,site)):
                isrequest=1
                start="Request: "
            else:
                isrequest=0
                start="Directory: "
            
            result+="<li> "+ start + site + " <br>  " +  self.getOptions(daveutil.pathjoin(dir,site),isrequest)+"</li>"

        result+="</ul>"
        return result

    #returns and html string for the options the user can click on to do
    #things like "crawl" "resend with modifications" etc
    #must change site around for rewrite request
    def getOptions(self,dir,isrequest):
        realdir=dir.replace(self.basedir,"")

        #print "getOptions realdir="+realdir
        site=realdir.split("/")[0]
        sitename=site.split("_")[0]
        siteport=site.split("_")[1]
        sitessl=site.split("_")[2]=="1"
        if sitessl:
            site="https://"+sitename
        else:
            site="http://"+sitename
        site+=":"+siteport+"/"
        site+="/".join(realdir.split("/")[1:-1])
        site=site.replace("/_directory_","")

        if isrequest==1:
            getinfo="Print Request Info"
        else:
            getinfo="Delve into Dir"

        
        #print "getOptions site: "+site
        result= "<a href=\"/getinfo?file="+realdir+"\">   "+getinfo+",</a>   "
        if isrequest:
            result+="<a href=\""+site+"/rewrite?SPIKE_TRIGGER=yes&file="+realdir+"\">   rewrite request,</a>   "
            result+="<a href=\"/displayresponse?file="+realdir+"\">   Display Response,</a>    "
        if isrequest:
            result+="<a href=\"/crawl?file="+realdir+"\">     crawl,</a>    "
        result+="<a href=\"/argscan?file="+realdir+"\">   argscan,</a>   "
        result+="<a href=\"/dirscan?file="+realdir+"\">   dirscan,</a>   "
        result+="<a href=\"/overflow?file="+realdir+"\">  overflow</a>   "
        return result

    #rewrite a request and resend it
    def rewrite(self,file):
        #print "inside rewrite"
        realfile=daveutil.pathjoin(self.basedir,file)
        if os.path.isfile(realfile):
            result= self.displayRequestForm(realfile)
            return result
        print "Could not rewrite file: "+realfile
        return notimplementedyet
    
    def crawl(self,file):
        #set this to 1 to enable debug printfs
        debug_crawl=0
        
        #ok, I have a file I want to crawl
        #I need to open this file, get the IP and virtualhost we are
        #crawling and the start URL and any cookies and whatnot
        crawlURLList=[]
        doneURLList=[]
        realfile=daveutil.pathjoin(self.basedir,file)
        if not os.path.isfile(realfile):
            self.log( "!!!Some sort of error trying to crawl "+file)
            return notimplementedyet

        infile=open(realfile,"r")
        obj=cPickle.load(infile)
        infile.close()

        response=obj.getResponse()
        myhdr=obj.clientheader
        mybdy=obj.clientbody
        startURLs=[myhdr.URL]
        if startURLs==[]:
            self.log( "No URLS found to crawl from "+file)
        for o in startURLs:
            crawlURLList.append(o)

        #site is now https://www.cnn.com/ or similar
        site=myhdr.getSite()

        self.log("Starting crawl on site %s with URLs %s" % (site,str(startURLs)))
        #we're not done yet. :>
        done=0
        while not done:

            if self.stopallactions==1:
                return "stopped"
            
            
            #we set this and unset it if we find any we need to do
            done=1
            for url in crawlURLList:
                if debug_crawl:
                    print "crawl: URLList: %s" % url
                if url not in doneURLList:
                    doneURLList.append(url)
                    if debug_crawl:
                        print "Setting done to 0"
                    done=0
                    myhdr.URL=url
                    #if you don't remove these, then you sometimes get 304s which are not useful for crawling
                    myhdr.removeHeaders("If-Modified-Since")
                    myhdr.removeHeaders("If-None-Match")
                    self.log("Crawling URL: "+url)
                    newpage=self.makeRequest(myhdr,mybdy)
                    newURLS=daveutil.collectURLSFromPage(newpage)
                    if debug_crawl:
                        self.log("Collected %d URLS from Page of length %d" % (len(newURLS),len(newpage)))
                    #print "New URLS are: "+str(newURLS)
                    for newurl in newURLS:
                        if debug_crawl:
                            print "crawl: newurl=%s" % newurl

                            
                        if newurl[:4] != "http" and newurl.find("://")==-1:
                            if self.stopallactions==1:
                                return "stopped"
                            
                            if debug_crawl:
                                print "Found non-absolute URL %s" % newurl
                                
                            newurl=urlparse.urljoin(url,newurl)
                            
                        if debug_crawl:
                            print "Doing: "+newurl


                        #get extention and make sure we want to parse it
                        if newurl.split(".")[-1] in invalidHTMLExtentions:
                            self.log("Skipping non HTML page: "+newurl)
                            #continue with for loop
                            continue
                            
                        if debug_crawl:
                            print "newurl header: _%s_" % newurl[:7]

                        if newurl[:7] == "http://" or newurl[:8]=="https://":
                            if debug_crawl:
                                print "crawl: Absolute URL found " + newurl
                            #this doesn't have a trailing /
                            newsite="/".join(newurl.split("/")[:3])

                            #rip the site off of the newurl
                            newerurl="/".join(newurl.split("/")[3:])
                            #why would this happen? Well, we're covered if it does.
                            if newerurl=="":
                                newerurl="/"
                            if newerurl[0]!="/":
                                newerurl="/"+newerurl
                            
                            if newurl[-1]=="/" and newerurl[-1]!="/":
                                newerurl+="/"
                            if newurl[-1]!="/" and newerurl[-1]=="/":
                                newerurl=newerurl[:-1]
                            newurl=newerurl

                            if newsite != site:
                                if debug_crawl:
                                    self.log("crawl: Ignoring url on non-crawled site:"+newsite)
                                
                            else:
                                if newurl not in doneURLList:
                                    if debug_crawl:
                                        print "New URL Found: "+newurl
                                    crawlURLList.append(newurl)
                        else:
                            #not an absolute URL
                            if newurl[:7]=="mailto:":
                                if debug_crawl:
                                    self.log( "Ignoring mailto URL:"+newurl)
                                continue
                            if debug_crawl:
                                print "non-absolute URL found:"+newurl
                            newurl=urlparse.urljoin(url,newurl)
                            if newurl[0]!="/":
                                newurl="/"+newurl
                            if newurl not in doneURLList:
                                if debug_crawl:
                                    print "crawl: New URL Found: "+newurl
                                crawlURLList.append(newurl)
        
        self.log("Done with crawl!");
        
        header="<html><body>"
        footer="</body></html>"
        middle="Crawled Urls:<P>"
        for url in doneURLList:
            middle+="<li>"+url+"</li>"
        return header+middle+footer

    #gives the client the response.
    def displayResponse(self,file):
        result=""
        realfile=daveutil.pathjoin(self.basedir,file)

        #unpickle
        infile=open(realfile,"r")
        obj=cPickle.load(infile)
        infile.close()
        #load response
        result+=obj.getResponse()
        #send it out
        return result
        

    def getinfo(self,file):
        realfile=daveutil.pathjoin(self.basedir,file)
        #print "getinfo on realfile: "+realfile
        if os.path.isdir(realfile):
            return self.htmlDirectory(realfile)
        else:
            #it's a file so we need to display it in text
            return self.printRequestFile(realfile)

    def printRequestFile(self,realfile):
        infile=open(realfile,"r")
        obj=cPickle.load(infile)
        infile.close()
        data=obj.printme()
        return data


    #TODO: Move this stuff into separate files with readline
    #so you can just vi a new string into existance
    def setupfuzzstrings(self):

        self.odbcscanfuzzstrings=[]
        self.overflowfuzzstrings=[]

        i=1
        fuzzcharacters=["A","1","\"",".","<","%","%n"]
        while i<1000000:
            #a small selection of fuzz characters
            for fchar in fuzzcharacters:
                self.overflowfuzzstrings.append(fchar*i)
            i=i*2

        self.odbcscanfuzzstrings.append("../../../../../../../../../../../../etc/hosts%00")
        self.odbcscanfuzzstrings.append("../../../../../../../../../../../../etc/hosts")
        self.odbcscanfuzzstrings.append("../../../../../../../../../../../../etc/passwd%00")
        self.odbcscanfuzzstrings.append("../../../../../../../../../../../../etc/passwd")
        self.odbcscanfuzzstrings.append("../../../../../../../../../../../../etc/shadow%00")
        self.odbcscanfuzzstrings.append("../../../../../../../../../../../../etc/shadow")
        self.odbcscanfuzzstrings.append("../../../../../../../../../../../../boot.ini%00")
        self.odbcscanfuzzstrings.append("../../../../../../../../../../../../boot.ini")
        self.odbcscanfuzzstrings.append("../../../../../../../../../../../../localstart.asp%00")
        self.odbcscanfuzzstrings.append("../../../../../../../../../../../../localstart.asp")
        self.odbcscanfuzzstrings.append("%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%00")
        self.odbcscanfuzzstrings.append("%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%255cboot.ini")
        self.odbcscanfuzzstrings.append("/%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%00")
        self.odbcscanfuzzstrings.append("/%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..%25%5c..winnt/desktop.ini")
        self.odbcscanfuzzstrings.append("65536")
        self.odbcscanfuzzstrings.append("0xfffffff")
        self.odbcscanfuzzstrings.append("268435455")
        self.odbcscanfuzzstrings.append("1")
        self.odbcscanfuzzstrings.append("0")
        self.odbcscanfuzzstrings.append("-1")
        self.odbcscanfuzzstrings.append("-268435455")
        self.odbcscanfuzzstrings.append("-20")
        self.odbcscanfuzzstrings.append("1;SELECT%20*")
        self.odbcscanfuzzstrings.append("'sqlattempt1")
        self.odbcscanfuzzstrings.append("(sqlattempt2)")
        self.odbcscanfuzzstrings.append("OR%201=1")
        self.odbcscanfuzzstrings.append(";read;")
        self.odbcscanfuzzstrings.append(";netstat -a;")
        #causes all sorts of problems
        self.odbcscanfuzzstrings.append("\nnetstat -a%\n")
        self.odbcscanfuzzstrings.append("\"hihihi")
        
        self.odbcscanfuzzstrings.append("|dir")
        
        self.odbcscanfuzzstrings.append("|ls")
        self.odbcscanfuzzstrings.append("+%00")
        self.odbcscanfuzzstrings.append("%20$(sleep%2050)")
        self.odbcscanfuzzstrings.append("%20'sleep%2050'")
        self.odbcscanfuzzstrings.append("!@#$%%^#$%#$@#$%$$@#$%^^**(()")
        self.odbcscanfuzzstrings.append("%01%02%03%04%0a%0d%0aADSF")
        self.odbcscanfuzzstrings.append("Bob's%20Foot")



    #look for /backup/ /admin/ etc
    #TODO: This is completely bogus currently
    def dirscan(self,file):
        self.log("Starting directory scan on "+file)
        self.scannedForDirs={}
        self.scannedForFiles={}
        
        result= self.dofilescan(file)
        self.log("Done with directory scan on "+file)
        return result
    
    def argscan(self,file):
        self.log("Starting argument fuzz on "+file)
        return self.doargsfuzz(file,self.odbcscanfuzzstrings)

    def overflow(self,file):
        self.log("Starting overflow fuzz on "+file)
        return self.doargsfuzz(file,self.overflowfuzzstrings)

    def lookforfiles(self,ch,cb):
        #copy this off
        oldheaderurl=ch.URL[:]
        if self.scannedForFiles.has_key(oldheaderurl):
            return "Already scanned directory "+oldheaderurl
        else:
            #save us off so we don't scan us again
            self.scannedForFiles[oldheaderurl]=""
            
        for suffix in backupSuffixList:
            newurl=oldheaderurl+suffix
            ch.URL=newurl
            self.dontLog(ch)
            result=self.makeRequest(ch,cb)
            #if we didn't see a "did not exist," then we found gold!
            if (result.count("404")==0 or result.count("403.6")!=0) and result.count("No such list <em>")==0:
                self.log("Found file! *"+newurl+"*")
            self.removeDontLog(ch)

        return "Done with scanning a file!"

    #ch is header of request we are going to scan for directories
    #such as /admin/ etc
    #also looks for file turds such as ws_ftp.log
    def lookfordirs(self,ch,cb):

        #copy this off
        oldheaderurl=ch.URL[:]
        if self.scannedForDirs.has_key(oldheaderurl):
            return "Already scanned directory "+oldheaderurl
        else:
            #save us off so we don't scan us again
            self.scannedForDirs[oldheaderurl]=""

        #self.log("Looking for dirs in "+oldheaderurl)
        
        for dir in scanDirList:
            newurl=oldheaderurl+dir+"/"
            ch.URL=newurl
            self.dontLog(ch)
            result=self.makeRequest(ch,cb)
            #No such list is the error message mailman gives...this reduces false positives
            if (result.count("404")==0 or result.count("403.6")!=0) and result.count("No such list <em>")==0:
                self.log("Found directory! *"+newurl+"*")
            self.removeDontLog(ch)
            if self.stopallactions:
                return "Stopped."

        for file in fileRotList:

            newurl=oldheaderurl+"/"+file
            ch.URL=newurl
            self.dontLog(ch)
            result=self.makeRequest(ch,cb)
            if (result.count("404")==0 or result.count("403.6")!=0) and result.count("No such list <em>")==0:
                self.log("Found file! *"+newurl+"*")
            self.removeDontLog(ch)
            if self.stopallactions:
                return "Stopped."

        return "Done with scanning a directory!"


    def dofilescan(self,file):

        if self.stopallactions:
            return "Stopped."

        #read in file and unpickle it into a request object
        realfile=daveutil.pathjoin(self.basedir,file)

        #here we delve into directories transparently!
        if os.path.isdir(realfile):
            #self.log("Is a directory:"+realfile)
            filelist=os.listdir(realfile)
            for newfile in filelist:
                realnewfile=daveutil.pathjoin(file,newfile)
                #self.log("Delving file scan into: "+realnewfile)
                self.dofilescan(realnewfile)
            return "Done scanning for files and directories!"

        infile=open(realfile,"r")
        obj=cPickle.load(infile)
        infile.close()
        #save these off in shorter form
        ch=obj.clientheader
        cb=obj.clientbody

        if realfile.count("_directory_")>0:
            self.lookfordirs(ch,cb)
        else:
            self.lookforfiles(ch,cb)
            #rip off the filename
            #self.log("New Dir from: "+ch.URL)
            base="/"+"/".join(ch.URL.split("/")[1:-1])+"/"
            #self.log("Base="+base)
            ch.URL=base
            self.lookfordirs(ch,cb)

        return "Completed scanning for files and directories."

    #changes each value in the file's request to a fuzzstring
    #and sends the requests
    def doargsfuzz(self,file,fuzzstringsset):
        #read in file and unpickle it into a request object
        realfile=daveutil.pathjoin(self.basedir,file)

        if self.stopallactions:
            return "Stopped."



        #here we handle directories transparently!
        if os.path.isdir(realfile):
            filelist=os.listdir(realfile)
            for newfile in filelist:
                realnewfile=daveutil.pathjoin(file,newfile)
                #self.log("Delving Fuzz into: "+realnewfile)
                self.doargsfuzz(realnewfile,fuzzstringsset)
            return "Done with fuzzing a directory!"
                
        infile=open(realfile,"r")
        obj=cPickle.load(infile)
        infile.close()
        #save these off in shorter form
        ch=obj.clientheader
        cb=obj.clientbody
        #for each argument
        #   for each fuzzstring
        #       replace argument with fuzzstring and try attack
        #       when the attack is read into the Request storer, it'll
        #       get scanned for successful ODBC messages and stuff

        if self.stopallactions:
            return "Stopped."

        #URL Arguments (GET requests)
        for key in ch.URLargsDict.keys():
            oldvalue=ch.URLargsDict[key]
            for fuzzstring in fuzzstringsset:
                if self.stopallactions:
                    return "Stopped."
                ch.URLargsDict[key]=urllib.quote_plus(fuzzstring)
                #we basically ignore result!
                self.dontLog(ch)
                result=self.makeRequest(ch,cb)
                self.removeDontLog(ch)
            ch.URLargsDict[key]=oldvalue

        #BODY arguments (POSTS)
        #copy off old body data

        #copy this off
        oldbodydata=cb.data[:]
        bodyargs=daveutil.splitargs("".join(cb.data))
        if bodyargs!=None:
            for akey in bodyargs.keys():
                oldvalue=bodyargs[akey]
                for fuzzstring in fuzzstringsset:
                    if self.stopallactions:
                        return "Stopped."
                    
                    #just use the fuzzstring
                    bodyargs[akey]=urllib.quote_plus(fuzzstring)
                    self.dontLog(ch)
                    cb.data=daveutil.joinargs(bodyargs)
                    result=self.makeRequest(ch,cb)
                    self.removeDontLog(ch)
                    #now add the long string to the oldvalue.
                    #works on MS Content Management Server!
                    bodyargs[akey]=oldvalue+urllib.quote_plus(fuzzstring)
                    self.dontLog(ch)
                    cb.data=daveutil.joinargs(bodyargs)
                    result=self.makeRequest(ch,cb)
                    self.removeDontLog(ch)
                    
                bodyargs[akey]=oldvalue
        cb.data=oldbodydata        
        return "Done with fuzzing a file!"

    
################

    def dontLog(self,ch):
        self.nottolog+=[ch]
        #print "self.nottolog="+str(self.nottolog)
        
    def removeDontLog(self,ch):
        self.nottolog.remove(ch)
        
    #loads a Request and displays it as a form
    def displayRequestForm(self,file):
        infile=open(file,"r")
        obj=cPickle.load(infile)
        infile.close()
        ch=obj.clientheader
        cb=obj.clientbody

        #must use the real URL so that loading href "/bob.something" works as if from that server
        our_url=ch.URL+"/sendrequest"
        #strip off leading double slashes
        if our_url[:2]=="//":
            our_url=our_url[1:]
        site="http"
        if ch.clientisSSL:
            site+="s"
        site+="://"+ch.connectHost+":"+str(ch.connectPort)
        result=""
        result+="<FORM action=\""+site+our_url+"\" method=\"GET\">\n"
        result+=daveutil.printHiddenEntry("SPIKE_TRIGGER","yes")
        result+=daveutil.printFormEntry("Verb", ch.verb)
        result+=daveutil.printFormEntry("ConnectHost",ch.connectHost)
        result+=daveutil.printFormEntry("ConnectPort",str(ch.connectPort))
        result+=daveutil.printFormEntry("URL",ch.URL)
        result+=daveutil.printFormCheckbox("SSL",ch.clientisSSL)

        result +="<P><h1>Headers</H1><P>"
        #print out all the headers

        i=0
        for hkey in ch.headerValuesDict.keys():
            for val in ch.headerValuesDict[hkey]:
                result+=daveutil.printFormEntryAndValue("Header"+str(i),hkey,val)
                i=i+1
                
        #some extra headers if the user wants
        for i in range(i,i+5,1):
            result+=daveutil.printFormEntryAndValue("Header"+str(i),"","")
            
            
        result +="<P><h1>URL Args</H1>"
        i=0
        if len(obj.clientheader.URLargsDict) > 0:
            for akey in ch.URLargsDict.keys():
                result+=daveutil.printFormEntryAndValue("URLArg"+str(i),akey,ch.URLargsDict[akey])
                i=i+1
                
                

        #some extra URL arguments if the user wants
        for i in range(i,i+5,1):
            result+=daveutil.printFormEntryAndValue("URLArg"+str(i),"","")


        #now the body arguments
        i=0
        result+="<P><h1>Body Args</h1></p>"
        if len(cb.data)>0:
            bodyargs=daveutil.splitargs("".join(obj.clientbody.data))
            if bodyargs!=None:
                for akey in bodyargs.keys():
                    result+=daveutil.printFormEntryAndValue("BodyArg"+str(i),akey,bodyargs[akey])

        #some extra body arguments if the user wants
        for i in range(i,i+5,1):
            result+=daveutil.printFormEntryAndValue("BodyArg"+str(i),"","")

        result+="<input type=\"submit\" >"
        result+="<input type=\"reset\">"
        result+="</form>"


        return result
    

    #just a little default header thing
    def addHeader(self,data):
        result="HTTP/1.1 200 OK\r\n"
        result+="Server: SPIKE Proxy 1.1\r\n"
        result+="Content-Type: text/html\r\n"
        result+="Content-Length: "+str(len(data))+"\r\n"
        result+="\r\n"
        result+=data
        return result


    #supports rewrite!
    #sends the actual request to the remote server!
    def sendrequest(self,myheader,mybody):
        result=""
        #new header and body to fill up
        newh=spkproxy.header()
        newb=spkproxy.body()
        #now disassemble myheader

        #debug
        #keys=myheader.URLargsDict.keys()
        #print "Keys: "+str(keys)
        
        newh.URL=urllib.unquote_plus(myheader.URLargsDict["URL"])
        newh.verb=urllib.unquote_plus(myheader.URLargsDict["Verb"])
        newh.connectHost=urllib.unquote_plus(myheader.URLargsDict["ConnectHost"])
        newh.connectPort=myheader.URLargsDict["ConnectPort"]
        newh.version="HTTP/1.1"
        #checkbox, only exists if it is checked
        newh.clientisSSL= myheader.URLargsDict.has_key("SSL")


        #handle each other
        did=["SPIKE_TRIGGER","URL","Verb","ConnectHost","ConnectPort","SSL"]
        firstbodyarg=1
        for akey in myheader.URLargsDict.keys():
            #filter the ones we already did
            if akey in did:
                continue
            #is it a header value?
            if akey.count("Header"):
                #names
                if akey[-1]=="N":
                    valuename=akey[:-1]+"V"
                    headername=akey
                else:
                    valuename=akey
                    headername=akey[:-1]+"N"
                    
                header=myheader.URLargsDict[headername]
                value=myheader.URLargsDict[valuename]
                #add this to the did list so we don't do it again
                did.append(valuename)
                did.append(headername)

                if header!="":
                    if not newh.headerValuesDict.has_key(header):
                        newh.headerValuesDict[header]=[]
                    newh.headerValuesDict[header].append(urllib.unquote_plus(value))

            if akey.count("URLArg"):
                #names
                if akey[-1]=="N":
                    valuename=akey[:-1]+"V"
                    argname=akey
                else:
                    valuename=akey
                    argname=akey[:-1]+"N"

                arg=myheader.URLargsDict[argname]
                value=myheader.URLargsDict[valuename]
                #add this to the did list so we don't do it again
                did.append(valuename)
                did.append(argname)
                #store it
                if arg!="":
                    newh.URLargsDict[arg]=urllib.unquote_plus(value)

            
            if akey.count("Body"):
                #names
                if akey[-1]=="N":
                    valuename=akey[:-1]+"V"
                    argname=akey
                else:
                    valuename=akey
                    argname=akey[:-1]+"N"

                arg=myheader.URLargsDict[argname]
                value=myheader.URLargsDict[valuename]
                #add this to the did list so we don't do it again
                did.append(valuename)
                did.append(argname)
                #storeit
                if arg!="":
                    if not firstbodyarg:
                        newb.data.append("&")
                    newb.data.append(urllib.unquote_plus(arg)+"="+urllib.unquote_plus(value))
                    firstbodyarg=0

        #ok, so now we have a new header and body (newh, newb)
        result=self.makeRequest(newh,newb)
        return result

    def saveInRequestCache(self,filename):
        self.requestCache=[filename]+self.requestCache
        #cut the last entry off if we're getting too big
        if len(self.requestCache)==self.requestCacheMaxLength:
            del self.requestCache[-1]

    def log(self,loginfo):
        timeoflog=time.asctime()
        logstring= "[%s] : %s" % (timeoflog,loginfo)
        #print it out to our running string
        print logstring
        self.logs=[logstring]+self.logs
        if len(self.logs)==self.maxlogs:
            del self.logs[-1]

    #makes a request - doesn't fork off a new thread
    #takes in a header and body
    #does handle SSL
    #returns a header and body from the server as a string
    def makeRequest(self,newh,newb):
        #we send ourselves in as the UI for our child request
        myconnection=spkproxy.spkProxyConnection(None,self)
        myconnection.clientisSSL=newh.clientisSSL
        if newh.clientisSSL:
            myconnection.sslHost=newh.connectHost
            myconnection.sslPort=newh.connectPort
        result=myconnection.sendRequest(newh,newb)
        return result

    #sets up the triggers for errors messages we detect
    def setupTriggers(self):
        self.scantriggers=[]
        self.scantriggers.append(("ODBC","ODBC Error!"))
        self.scantriggers.append(("Internal Server","Internal Server Error!"))
        self.scantriggers.append(("SQLException","SQL Injection flaw on DB2!"))
        self.scantriggers.append(("SQLSTATE","SQL Injection flaw on DB2!"))
        self.scantriggers.append(("Volume in drive","dir.exe was spawned!"))
        self.scantriggers.append(("Microsoft Windows 2000 [Version","cmd.exe was spawned!"))

    #returns a 1 if triggered on something
    #BUGS: we only return one trigger at a time
    def scanForTriggers(self,serverheader,serverbody):
        retval=""
        allbody="".join(serverbody.data)
        for triggerstring,triggervalue in self.scantriggers:
            if allbody.count(triggerstring):
                retval=triggerstring

        return retval

