140 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			140 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env python3
 | |
| 
 | |
| # Copyright (c) 2021 Battelle Energy Alliance, LLC.  All rights reserved.
 | |
| 
 | |
| 
 | |
| import os
 | |
| from os.path import splitext
 | |
| from tempfile import gettempdir
 | |
| import errno
 | |
| import time
 | |
| import fcntl
 | |
| import fnmatch
 | |
| import magic
 | |
| import json
 | |
| import pprint
 | |
| import re
 | |
| from subprocess import Popen, PIPE
 | |
| 
 | |
| lockFilename = os.path.join(gettempdir(), '{}.lock'.format(os.path.basename(__file__)))
 | |
| broDir = os.path.join(os.getenv('FILEBEAT_ZEEK_DIR', "/data/zeek/"), '')
 | |
| cleanLogSeconds = int(os.getenv('FILEBEAT_LOG_CLEANUP_MINUTES', "30")) * 60
 | |
| cleanZipSeconds = int(os.getenv('FILEBEAT_ZIP_CLEANUP_MINUTES', "120")) * 60
 | |
| fbRegFilename = os.getenv('FILEBEAT_REGISTRY_FILE', "/usr/share/filebeat/data/registry/filebeat/data.json")
 | |
| currentDir = broDir + "current/"
 | |
| processedDir = broDir + "processed/"
 | |
| 
 | |
| import os, errno
 | |
| 
 | |
| def silentRemove(filename):
 | |
|   try:
 | |
|     if os.path.isfile(filename) or os.path.islink(filename):
 | |
|       os.remove(filename)
 | |
|     elif os.path.isdir(filename):
 | |
|       os.rmdir(filename)
 | |
|   except OSError:
 | |
|     pass
 | |
| 
 | |
| def pruneFiles():
 | |
| 
 | |
|   if (cleanLogSeconds <= 0) and (cleanZipSeconds <= 0):
 | |
|     # disabled, don't do anything
 | |
|     return
 | |
| 
 | |
|   nowTime = time.time()
 | |
| 
 | |
|   logMimeType = "text/plain"
 | |
|   archiveMimeTypeRegex = re.compile(r"(application/gzip|application/x-gzip|application/x-7z-compressed|application/x-bzip2|application/x-cpio|application/x-lzip|application/x-lzma|application/x-rar-compressed|application/x-tar|application/x-xz|application/zip)")
 | |
| 
 | |
|   # look for regular files in the processed/ directory
 | |
|   foundFiles = [(os.path.join(root, filename)) for root, dirnames, filenames in os.walk(processedDir) for filename in filenames]
 | |
| 
 | |
|   # look up the filebeat registry file and try to read it
 | |
|   fbReg = None
 | |
|   if os.path.isfile(fbRegFilename):
 | |
|     with open(fbRegFilename) as f:
 | |
|       fbReg = json.load(f)
 | |
| 
 | |
|   # see if the files we found are in use and old enough to be pruned
 | |
|   for file in foundFiles:
 | |
| 
 | |
|     # first check to see if it's in the filebeat registry
 | |
|     if fbReg is not None:
 | |
|       fileStatInfo = os.stat(file)
 | |
|       if (fileStatInfo):
 | |
|         fileFound = any(((entry['FileStateOS']) and
 | |
|                          (entry['FileStateOS']['device'] == fileStatInfo.st_dev) and
 | |
|                          (entry['FileStateOS']['inode'] == fileStatInfo.st_ino)) for entry in fbReg)
 | |
|         if fileFound:
 | |
|           # found a file in the filebeat registry, so leave it alone!
 | |
|           # we only want to delete files that filebeat has forgotten
 | |
|           #print "{} is found in registry!".format(file)
 | |
|           continue
 | |
|         #else:
 | |
|           #print "{} is NOT found in registry!".format(file)
 | |
| 
 | |
|     # now see if the file is in use by any other process in the system
 | |
|     fuserProcess = Popen(["fuser", "-s", file], stdout=PIPE)
 | |
|     fuserProcess.communicate()
 | |
|     fuserExitCode = fuserProcess.wait()
 | |
|     if (fuserExitCode != 0):
 | |
| 
 | |
|       # the file is not in use, let's check it's mtime/ctime
 | |
|       logTime = max(os.path.getctime(file), os.path.getmtime(file))
 | |
|       lastUseTime = nowTime - logTime
 | |
| 
 | |
|       # get the file type
 | |
|       fileType = magic.from_file(file, mime=True)
 | |
|       if (cleanLogSeconds > 0) and (fileType == logMimeType):
 | |
|         cleanSeconds = cleanLogSeconds
 | |
|       elif (cleanZipSeconds > 0) and archiveMimeTypeRegex.match(fileType) is not None:
 | |
|         cleanSeconds = cleanZipSeconds
 | |
|       else:
 | |
|         # not a file we're going to be messing with
 | |
|         cleanSeconds = 0
 | |
| 
 | |
|       if (cleanSeconds > 0) and (lastUseTime >= cleanSeconds):
 | |
|         # this is a closed file that is old, so delete it
 | |
|         print('removing old file "{}" ({}, used {} seconds ago)'.format(file, fileType, lastUseTime))
 | |
|         silentRemove(file)
 | |
| 
 | |
|   # clean up any broken symlinks in the current/ directory
 | |
|   for current in os.listdir(currentDir):
 | |
|     currentFileSpec = os.path.join(currentDir, current)
 | |
|     if os.path.islink(currentFileSpec) and not os.path.exists(currentFileSpec):
 | |
|       print('removing dead symlink "{}"'.format(currentFileSpec))
 | |
|       silentRemove(currentFileSpec)
 | |
| 
 | |
|   # clean up any old and empty directories in processed/ directory
 | |
|   cleanDirSeconds = min(i for i in (cleanLogSeconds, cleanZipSeconds) if i > 0)
 | |
|   candidateDirs = []
 | |
|   for root, dirs, files in os.walk(processedDir, topdown=False):
 | |
|     if (root and dirs):
 | |
|       candidateDirs += [os.path.join(root, tmpDir) for tmpDir in dirs]
 | |
|   candidateDirs = list(set(candidateDirs))
 | |
|   candidateDirs.sort(reverse=True)
 | |
|   candidateDirs.sort(key=len, reverse=True)
 | |
|   candidateDirsAndTimes = zip(candidateDirs, [os.path.getmtime(dirToRm) for dirToRm in candidateDirs])
 | |
|   for (dirToRm, dirTime) in candidateDirsAndTimes:
 | |
|     dirAge = (nowTime - dirTime)
 | |
|     if (dirAge >= cleanDirSeconds):
 | |
|       try:
 | |
|         os.rmdir(dirToRm)
 | |
|         print('removed empty directory "{}" (used {} seconds ago)'.format(dirToRm, dirAge))
 | |
|       except OSError:
 | |
|         pass
 | |
| 
 | |
| def main():
 | |
|   with open(lockFilename, 'w') as lock_file:
 | |
|     try:
 | |
|       fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
 | |
|     except IOError:
 | |
|         return
 | |
|     else:
 | |
|       pruneFiles()
 | |
|     finally:
 | |
|       os.remove(lockFilename)
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|   main()
 |