313 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			313 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env python3
 | |
| # -*- coding: utf-8 -*-
 | |
| 
 | |
| # Copyright (c) 2021 Battelle Energy Alliance, LLC.  All rights reserved.
 | |
| 
 | |
| ###################################################################################################
 | |
| # Process queued files reported by zeek_carve_watcher.py, scanning them with the specified
 | |
| # virus scan engine and sending the results along to zeek_carve_logger.py
 | |
| #
 | |
| # Run the script with --help for options
 | |
| ###################################################################################################
 | |
| 
 | |
| import argparse
 | |
| import os
 | |
| import pathlib
 | |
| import json
 | |
| import signal
 | |
| import sys
 | |
| import threading
 | |
| import time
 | |
| import zmq
 | |
| 
 | |
| from zeek_carve_utils import *
 | |
| from multiprocessing.pool import ThreadPool
 | |
| 
 | |
| ###################################################################################################
 | |
| debug = False
 | |
| verboseDebug = False
 | |
| debugToggled = False
 | |
| pdbFlagged = False
 | |
| args = None
 | |
| scriptName = os.path.basename(__file__)
 | |
| scriptPath = os.path.dirname(os.path.realpath(__file__))
 | |
| origPath = os.getcwd()
 | |
| shuttingDown = False
 | |
| scanWorkersCount = AtomicInt(value=0)
 | |
| 
 | |
| ###################################################################################################
 | |
| # handle sigint/sigterm and set a global shutdown variable
 | |
| def shutdown_handler(signum, frame):
 | |
|   global shuttingDown
 | |
|   shuttingDown = True
 | |
| 
 | |
| ###################################################################################################
 | |
| # handle sigusr1 for a pdb breakpoint
 | |
| def pdb_handler(sig, frame):
 | |
|   global pdbFlagged
 | |
|   pdbFlagged = True
 | |
| 
 | |
| ###################################################################################################
 | |
| # handle sigusr2 for toggling debug
 | |
| def debug_toggle_handler(signum, frame):
 | |
|   global debug
 | |
|   global debugToggled
 | |
|   debug = not debug
 | |
|   debugToggled = True
 | |
| 
 | |
| ###################################################################################################
 | |
| # look for a file to scan (probably in its original directory, but possibly already moved to quarantine)
 | |
| def locate_file(fileInfo):
 | |
|   global verboseDebug
 | |
| 
 | |
|   if isinstance(fileInfo, dict) and (FILE_SCAN_RESULT_FILE in fileInfo):
 | |
|     fileName = fileInfo[FILE_SCAN_RESULT_FILE]
 | |
|   elif isinstance(fileInfo, str):
 | |
|     fileName = fileInfo
 | |
|   else:
 | |
|     fileName = None
 | |
| 
 | |
|   if fileName is not None:
 | |
| 
 | |
|     if os.path.isfile(fileName):
 | |
|       return fileName
 | |
| 
 | |
|     else:
 | |
|       for testPath in [PRESERVE_QUARANTINED_DIR_NAME, PRESERVE_PRESERVED_DIR_NAME]:
 | |
|         testFileName = os.path.join(os.path.join(os.path.dirname(os.path.realpath(fileName)), testPath), os.path.basename(fileName))
 | |
|         if os.path.isfile(testFileName):
 | |
|           if verboseDebug: eprint(f"{scriptName}:\t⏩\t{testFileName}")
 | |
|           return testFileName
 | |
| 
 | |
|   return None
 | |
| 
 | |
| 
 | |
| ###################################################################################################
 | |
| def scanFileWorker(checkConnInfo, carvedFileSub):
 | |
|   global debug
 | |
|   global verboseDebug
 | |
|   global shuttingDown
 | |
|   global scanWorkersCount
 | |
| 
 | |
|   scanWorkerId = scanWorkersCount.increment() # unique ID for this thread
 | |
|   scannerRegistered = False
 | |
| 
 | |
|   if debug: eprint(f"{scriptName}[{scanWorkerId}]:\tstarted")
 | |
| 
 | |
|   try:
 | |
|     if isinstance(checkConnInfo, FileScanProvider):
 | |
| 
 | |
|       # initialize ZeroMQ context and socket(s) to send scan results
 | |
|       context = zmq.Context()
 | |
| 
 | |
|       # Socket to send messages to
 | |
|       scanned_files_socket = context.socket(zmq.PUSH)
 | |
|       scanned_files_socket.connect(f"tcp://localhost:{SINK_PORT}")
 | |
|       # todo: do I want to set this? probably not, since what else would we do if we can't send? just block
 | |
|       # scanned_files_socket.SNDTIMEO = 5000
 | |
|       if debug: eprint(f"{scriptName}[{scanWorkerId}]:\tconnected to sink at {SINK_PORT}")
 | |
| 
 | |
|       fileInfo = None
 | |
|       fileName = None
 | |
|       retrySubmitFile = False # todo: maximum file retry count?
 | |
| 
 | |
|       # loop forever, or until we're told to shut down
 | |
|       while not shuttingDown:
 | |
| 
 | |
|         # "register" this scanner with the logger
 | |
|         while (not scannerRegistered) and (not shuttingDown):
 | |
|           try:
 | |
|             scanned_files_socket.send_string(json.dumps({FILE_SCAN_RESULT_SCANNER : checkConnInfo.scanner_name()}))
 | |
|             scannerRegistered = True
 | |
|             if debug: eprint(f"{scriptName}[{scanWorkerId}]:\t🇷\t{checkConnInfo.scanner_name()}")
 | |
| 
 | |
|           except zmq.Again as timeout:
 | |
|             # todo: what to do here?
 | |
|             if verboseDebug: eprint(f"{scriptName}[{scanWorkerId}]:\t🕑\t{checkConnInfo.scanner_name()} 🇷")
 | |
| 
 | |
|         if shuttingDown:
 | |
|           break
 | |
| 
 | |
|         if retrySubmitFile and (fileInfo is not None) and (locate_file(fileInfo) is not None):
 | |
|           # we were unable to submit the file for processing, so try again
 | |
|           time.sleep(1)
 | |
|           if debug: eprint(f"{scriptName}[{scanWorkerId}]:\t🔃\t{json.dumps(fileInfo)}")
 | |
| 
 | |
|         else:
 | |
|           retrySubmitFile = False
 | |
|           # read watched file information from the subscription
 | |
|           fileInfo = carvedFileSub.Pull(scanWorkerId=scanWorkerId)
 | |
| 
 | |
|         fileName = locate_file(fileInfo)
 | |
|         if (fileName is not None) and os.path.isfile(fileName):
 | |
| 
 | |
|           # file exists, submit for scanning
 | |
|           if debug: eprint(f"{scriptName}[{scanWorkerId}]:\t🔎\t{json.dumps(fileInfo)}")
 | |
|           requestComplete = False
 | |
|           scanResult = None
 | |
|           fileSize = int(fileInfo[FILE_SCAN_RESULT_FILE_SIZE]) if isinstance(fileInfo[FILE_SCAN_RESULT_FILE_SIZE], int) or (isinstance(fileInfo[FILE_SCAN_RESULT_FILE_SIZE], str) and fileInfo[FILE_SCAN_RESULT_FILE_SIZE].isdecimal()) else None
 | |
|           scan = AnalyzerScan(provider=checkConnInfo, name=fileName,
 | |
|                               size=fileSize,
 | |
|                               fileType=fileInfo[FILE_SCAN_RESULT_FILE_TYPE],
 | |
|                               submissionResponse=checkConnInfo.submit(fileName=fileName, fileSize=fileSize, fileType=fileInfo[FILE_SCAN_RESULT_FILE_TYPE], block=False))
 | |
|           if scan.submissionResponse is not None:
 | |
|             if debug: eprint(f"{scriptName}[{scanWorkerId}]:\t🔍\t{fileName}")
 | |
| 
 | |
|             # file was successfully submitted and is now being scanned
 | |
|             retrySubmitFile = False
 | |
|             requestComplete = False
 | |
| 
 | |
|             # todo: maximum time we wait for a single file to be scanned?
 | |
|             while (not requestComplete) and (not shuttingDown):
 | |
| 
 | |
|               # wait a moment then check to see if the scan is complete
 | |
|               time.sleep(scan.provider.check_interval())
 | |
|               response = scan.provider.check_result(scan.submissionResponse)
 | |
| 
 | |
|               if isinstance(response, AnalyzerResult):
 | |
| 
 | |
|                 # whether the scan has completed
 | |
|                 requestComplete = response.finished
 | |
| 
 | |
|                 if response.success:
 | |
|                   # successful scan, report the scan results
 | |
|                   scanResult = response
 | |
| 
 | |
|                 elif isinstance(response.result, dict) and ("error" in response.result):
 | |
|                   # scan errored out, report the error
 | |
|                   scanResult = response.result["error"]
 | |
|                   eprint(f"{scriptName}[{scanWorkerId}]:\t❗\t{fileName} {scanResult}")
 | |
| 
 | |
|                 else:
 | |
|                   # result is unrecognizable
 | |
|                   scanResult = "Invalid scan result format"
 | |
|                   eprint(f"{scriptName}[{scanWorkerId}]:\t❗\t{fileName} {scanResult}")
 | |
| 
 | |
|               else:
 | |
|                 # impossibru! abandon ship for this file?
 | |
|                 # todo? what else? touch it?
 | |
|                 requestComplete = True
 | |
|                 scanResult = "Error checking results"
 | |
|                 eprint(f"{scriptName}[{scanWorkerId}]:\t❗{fileName} {scanResult}")
 | |
| 
 | |
|           else:
 | |
|             # we were denied (rate limiting, probably), so we'll need wait for a slot to clear up
 | |
|             retrySubmitFile = True
 | |
| 
 | |
|           if requestComplete and (scanResult is not None):
 | |
|             try:
 | |
|               # Send results to sink
 | |
|               scanned_files_socket.send_string(json.dumps(scan.provider.format(fileName, scanResult)))
 | |
|               if debug: eprint(f"{scriptName}[{scanWorkerId}]:\t✅\t{fileName}")
 | |
| 
 | |
|             except zmq.Again as timeout:
 | |
|               # todo: what to do here?
 | |
|               if verboseDebug: eprint(f"{scriptName}[{scanWorkerId}]:\t🕑\t{fileName}")
 | |
| 
 | |
|     else:
 | |
|       eprint(f"{scriptName}[{scanWorkerId}]:\tinvalid scanner provider specified")
 | |
| 
 | |
|   finally:
 | |
|     # "unregister" this scanner with the logger
 | |
|     if scannerRegistered:
 | |
|       try:
 | |
|         scanned_files_socket.send_string(json.dumps({FILE_SCAN_RESULT_SCANNER : f"-{checkConnInfo.scanner_name()}"}))
 | |
|         scannerRegistered = False
 | |
|         if debug: eprint(f"{scriptName}[{scanWorkerId}]:\t🙃\t{checkConnInfo.scanner_name()}")
 | |
|       except zmq.Again as timeout:
 | |
|         # todo: what to do here?
 | |
|         if verboseDebug: eprint(f"{scriptName}[{scanWorkerId}]:\t🕑\t{checkConnInfo.scanner_name()} 🙃")
 | |
| 
 | |
|   if debug: eprint(f"{scriptName}[{scanWorkerId}]:\tfinished")
 | |
| 
 | |
| ###################################################################################################
 | |
| # main
 | |
| def main():
 | |
|   global args
 | |
|   global debug
 | |
|   global debugToggled
 | |
|   global pdbFlagged
 | |
|   global shuttingDown
 | |
|   global verboseDebug
 | |
| 
 | |
|   parser = argparse.ArgumentParser(description=scriptName, add_help=False, usage='{} <arguments>'.format(scriptName))
 | |
|   parser.add_argument('-v', '--verbose', dest='debug', help="Verbose output", metavar='true|false', type=str2bool, nargs='?', const=True, default=False, required=False)
 | |
|   parser.add_argument('--extra-verbose', dest='verboseDebug', help="Super verbose output", metavar='true|false', type=str2bool, nargs='?', const=True, default=False, required=False)
 | |
|   parser.add_argument('--start-sleep', dest='startSleepSec', help="Sleep for this many seconds before starting", metavar='<seconds>', type=int, default=0, required=False)
 | |
|   parser.add_argument('--req-limit', dest='reqLimit', help="Requests limit", metavar='<requests>', type=int, default=None, required=False)
 | |
|   parser.add_argument('--malass-host', dest='malassHost', help="Malass host or IP address", metavar='<host>', type=str, required=False)
 | |
|   parser.add_argument('--malass-port', dest='malassPort', help="Malass web interface port", metavar='<port>', type=int, default=80, required=False)
 | |
|   parser.add_argument('--vtot-api', dest='vtotApi', help="VirusTotal API key", metavar='<API key>', type=str, required=False)
 | |
|   parser.add_argument('--clamav', dest='enableClamAv', metavar='true|false', help="Enable ClamAV", type=str2bool, nargs='?', const=True, default=False, required=False)
 | |
|   parser.add_argument('--clamav-socket', dest='clamAvSocket', help="ClamAV socket filename", metavar='<filespec>', type=str, required=False, default=None)
 | |
|   parser.add_argument('--yara', dest='enableYara', metavar='true|false', help="Enable Yara", type=str2bool, nargs='?', const=True, default=False, required=False)
 | |
|   parser.add_argument('--yara-custom-only', dest='yaraCustomOnly', metavar='true|false', help="Ignore default Yara rules", type=str2bool, nargs='?', const=True, default=False, required=False)
 | |
|   parser.add_argument('--capa', dest='enableCapa', metavar='true|false', help="Enable Capa", type=str2bool, nargs='?', const=True, default=False, required=False)
 | |
|   parser.add_argument('--capa-rules', dest='capaRulesDir', help="Capa Rules Directory", metavar='<pathspec>', type=str, required=False)
 | |
|   parser.add_argument('--capa-verbose', dest='capaVerbose', metavar='true|false', help="Log all capa rules, not just MITRE ATT&CK technique classifications", type=str2bool, nargs='?', const=True, default=False, required=False)
 | |
| 
 | |
|   try:
 | |
|     parser.error = parser.exit
 | |
|     args = parser.parse_args()
 | |
|   except SystemExit:
 | |
|     parser.print_help()
 | |
|     exit(2)
 | |
| 
 | |
|   verboseDebug = args.verboseDebug
 | |
|   debug = args.debug or verboseDebug
 | |
|   if debug:
 | |
|     eprint(os.path.join(scriptPath, scriptName))
 | |
|     eprint("{} arguments: {}".format(scriptName, sys.argv[1:]))
 | |
|     eprint("{} arguments: {}".format(scriptName, args))
 | |
|   else:
 | |
|     sys.tracebacklimit = 0
 | |
| 
 | |
|   # handle sigint and sigterm for graceful shutdown
 | |
|   signal.signal(signal.SIGINT, shutdown_handler)
 | |
|   signal.signal(signal.SIGTERM, shutdown_handler)
 | |
|   signal.signal(signal.SIGUSR1, pdb_handler)
 | |
|   signal.signal(signal.SIGUSR2, debug_toggle_handler)
 | |
| 
 | |
|   # sleep for a bit if requested
 | |
|   sleepCount = 0
 | |
|   while (not shuttingDown) and (sleepCount < args.startSleepSec):
 | |
|     time.sleep(1)
 | |
|     sleepCount += 1
 | |
| 
 | |
|   # intialize objects for virus scanning engines
 | |
|   if (isinstance(args.malassHost, str) and (len(args.malassHost) > 1)):
 | |
|     checkConnInfo = MalassScan(args.malassHost, args.malassPort, reqLimit=args.reqLimit)
 | |
|   elif (isinstance(args.vtotApi, str) and (len(args.vtotApi) > 1) and (args.vtotReqLimit > 0)):
 | |
|     checkConnInfo = VirusTotalSearch(args.vtotApi, reqLimit=args.reqLimit)
 | |
|   elif args.enableYara:
 | |
|     yaraDirs = []
 | |
|     if (not args.yaraCustomOnly):
 | |
|       yaraDirs.append(YARA_RULES_DIR)
 | |
|     yaraDirs.append(YARA_CUSTOM_RULES_DIR)
 | |
|     checkConnInfo = YaraScan(debug=debug, verboseDebug=verboseDebug, rulesDirs=yaraDirs, reqLimit=args.reqLimit)
 | |
|   elif args.enableCapa:
 | |
|     checkConnInfo = CapaScan(debug=debug, verboseDebug=verboseDebug, rulesDir=args.capaRulesDir, verboseHits=args.capaVerbose, reqLimit=args.reqLimit)
 | |
|   else:
 | |
|     if not args.enableClamAv:
 | |
|       eprint('No scanner specified, defaulting to ClamAV')
 | |
|     checkConnInfo = ClamAVScan(debug=debug, verboseDebug=verboseDebug, socketFileName=args.clamAvSocket, reqLimit=args.reqLimit)
 | |
| 
 | |
|   carvedFileSub = CarvedFileSubscriberThreaded(debug=debug, verboseDebug=verboseDebug,
 | |
|                                                host='localhost', port=VENTILATOR_PORT,
 | |
|                                                scriptName=scriptName)
 | |
| 
 | |
|   # start scanner threads which will pull filenames to be scanned and send the results to the logger
 | |
|   scannerThreads = ThreadPool(checkConnInfo.max_requests(), scanFileWorker, ([checkConnInfo, carvedFileSub]))
 | |
|   while (not shuttingDown):
 | |
|     if pdbFlagged:
 | |
|       pdbFlagged = False
 | |
|       breakpoint()
 | |
|     time.sleep(0.2)
 | |
| 
 | |
|   # graceful shutdown
 | |
|   if debug: eprint(f"{scriptName}: shutting down...")
 | |
|   time.sleep(5)
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|   main()
 |