312 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			312 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env python2
 | |
| # -*- coding: utf-8 -*-
 | |
| 
 | |
| # Copyright (c) 2021 Battelle Energy Alliance, LLC.  All rights reserved.
 | |
| 
 | |
| from __future__ import print_function
 | |
| 
 | |
| import sys
 | |
| import os
 | |
| import re
 | |
| import argparse
 | |
| import struct
 | |
| import ipaddress
 | |
| import itertools
 | |
| import json
 | |
| import pprint
 | |
| import uuid
 | |
| from collections import defaultdict
 | |
| 
 | |
| UNSPECIFIED_TAG = '<~<~<none>~>~>'
 | |
| HOST_LIST_IDX = 0
 | |
| SEGMENT_LIST_IDX = 1
 | |
| 
 | |
| JSON_MAP_TYPE_SEGMENT = 'segment'
 | |
| JSON_MAP_TYPE_HOST = 'host'
 | |
| JSON_MAP_KEY_ADDR = 'address'
 | |
| JSON_MAP_KEY_NAME = 'name'
 | |
| JSON_MAP_KEY_TAG = 'tag'
 | |
| JSON_MAP_KEY_TYPE = 'type'
 | |
| 
 | |
| ###################################################################################################
 | |
| # print to stderr
 | |
| def eprint(*args, **kwargs):
 | |
|   print(*args, file=sys.stderr, **kwargs)
 | |
| 
 | |
| ###################################################################################################
 | |
| # recursively convert unicode strings to utf-8 strings
 | |
| def byteify(input):
 | |
|   if isinstance(input, dict):
 | |
|     return {byteify(key): byteify(value)
 | |
|       for key, value in input.iteritems()}
 | |
|   elif isinstance(input, list):
 | |
|     return [byteify(element) for element in input]
 | |
|   elif isinstance(input, unicode):
 | |
|     return input.encode('utf-8')
 | |
|   else:
 | |
|     return input
 | |
| 
 | |
| ###################################################################################################
 | |
| # main
 | |
| def main():
 | |
| 
 | |
|   # extract arguments from the command line
 | |
|   # print (sys.argv[1:]);
 | |
|   parser = argparse.ArgumentParser(description='Logstash IP address to Segment Filter Creator', add_help=False, usage='ip-to-segment-logstash.py <arguments>')
 | |
|   parser.add_argument('-m', '--mixed', dest='mixedInput', metavar='<STR>', type=str, nargs='*', default='', help='Input mixed JSON mapping file(s)')
 | |
|   parser.add_argument('-s', '--segment', dest='segmentInput', metavar='<STR>', type=str, nargs='*', default='', help='Input segment mapping file(s)')
 | |
|   parser.add_argument('-h', '--host', dest='hostInput', metavar='<STR>', type=str, nargs='*', default='', help='Input host mapping file(s)')
 | |
|   parser.add_argument('-o', '--output', dest='output', metavar='<STR>', type=str, default='-', help='Output file')
 | |
|   try:
 | |
|     parser.error = parser.exit
 | |
|     args = parser.parse_args()
 | |
|   except SystemExit:
 | |
|     parser.print_help()
 | |
|     exit(2)
 | |
| 
 | |
|   # read each input file into its own list
 | |
|   segmentLines = []
 | |
|   hostLines = []
 | |
|   mixedEntries = []
 | |
| 
 | |
|   for inFile in args.segmentInput:
 | |
|     if os.path.isfile(inFile):
 | |
|       segmentLines.extend([line.strip() for line in open(inFile)])
 | |
| 
 | |
|   for inFile in args.hostInput:
 | |
|     if os.path.isfile(inFile):
 | |
|       hostLines.extend([line.strip() for line in open(inFile)])
 | |
| 
 | |
|   for inFile in args.mixedInput:
 | |
|     try:
 | |
|       tmpMixedEntries = json.load(open(inFile, 'r'))
 | |
|       if isinstance(tmpMixedEntries, list):
 | |
|         mixedEntries.extend(byteify(tmpMixedEntries));
 | |
|     except:
 | |
|       pass
 | |
| 
 | |
|   # remove comments
 | |
|   segmentLines = list(filter(lambda x: (len(x) > 0) and (not x.startswith('#')), segmentLines))
 | |
|   hostLines = list(filter(lambda x: (len(x) > 0) and (not x.startswith('#')), hostLines))
 | |
| 
 | |
|   if (len(segmentLines) > 0) or (len(hostLines) > 0) or (len(mixedEntries) > 0):
 | |
| 
 | |
|     filterId = 0
 | |
|     addedFields = set()
 | |
| 
 | |
|     outFile = open(args.output, 'w+') if (args.output and args.output != '-') else sys.stdout
 | |
|     try:
 | |
|       print('filter {', file=outFile)
 | |
|       print("", file=outFile)
 | |
|       print("  # this file was automatically generated by {}".format(os.path.basename(__file__)), file=outFile)
 | |
|       print("", file=outFile)
 | |
| 
 | |
|       # process segment mappings into a dictionary of two dictionaries of lists (one for hosts, one for segments)
 | |
|       # eg., tagListMap[required tag name][HOST_LIST_IDX|SEGMENT_LIST_IDX][network segment name] = [172.16.0.0/12, 192.168.0.0/24, 10.0.0.41]
 | |
|       tagListMap = defaultdict(lambda: [defaultdict(list), defaultdict(list)])
 | |
| 
 | |
|       # handle segment mappings
 | |
|       for line in segmentLines:
 | |
|         # CIDR to network segment format:
 | |
|         #   IP(s)|segment name|required tag
 | |
|         #
 | |
|         # where:
 | |
|         #   IP(s): comma-separated list of CIDR-formatted network IP addresses
 | |
|         #          eg., 10.0.0.0/8, 169.254.0.0/16, 172.16.10.41
 | |
|         #
 | |
|         #   segment name: segment name to be assigned when event IP address(es) match
 | |
|         #
 | |
|         #   required tag (optional): only check match and apply segment name if the event
 | |
|         #                            contains this tag
 | |
|         values = [x.strip() for x in line.split('|')]
 | |
|         if len(values) >= 2:
 | |
|           networkList = []
 | |
|           for ip in ''.join(values[0].split()).split(','):
 | |
|             try:
 | |
|               networkList.append(str(ipaddress.ip_network(unicode(ip))).lower() if ('/' in ip) else str(ipaddress.ip_address(unicode(ip))).lower())
 | |
|             except ValueError:
 | |
|               eprint('"{}" is not a valid IP address, ignoring'.format(ip))
 | |
|           segmentName = values[1]
 | |
|           tagReq = values[2] if ((len(values) >= 3) and (len(values[2]) > 0)) else UNSPECIFIED_TAG
 | |
|           if (len(networkList) > 0) and (len(segmentName) > 0):
 | |
|             tagListMap[tagReq][SEGMENT_LIST_IDX][segmentName].extend(networkList)
 | |
|           else:
 | |
|             eprint('"{}" is not formatted correctly, ignoring'.format(line))
 | |
|         else:
 | |
|           eprint('"{}" is not formatted correctly, ignoring'.format(line))
 | |
| 
 | |
|       # handle hostname mappings
 | |
|       macAddrRegex = re.compile(r'([a-fA-F0-9]{2}[:|\-]?){6}')
 | |
|       for line in hostLines:
 | |
|         # IP or MAC address to host name map:
 | |
|         #   address|host name|required tag
 | |
|         #
 | |
|         # where:
 | |
|         #   address: comma-separated list of IPv4, IPv6, or MAC addresses
 | |
|         #          eg., 172.16.10.41, 02:42:45:dc:a2:96, 2001:0db8:85a3:0000:0000:8a2e:0370:7334
 | |
|         #
 | |
|         #   host name: host name to be assigned when event address(es) match
 | |
|         #
 | |
|         #   required tag (optional): only check match and apply host name if the event
 | |
|         #                            contains this tag
 | |
|         #
 | |
|         values = [x.strip() for x in line.split('|')]
 | |
|         if len(values) >= 2:
 | |
|           addressList = []
 | |
|           for addr in ''.join(values[0].split()).split(','):
 | |
|             try:
 | |
|               # see if it's an IP address
 | |
|               addressList.append(str(ipaddress.ip_address(unicode(addr))).lower())
 | |
|             except ValueError:
 | |
|               # see if it's a MAC address
 | |
|               if re.match(macAddrRegex, addr):
 | |
|                 # prepend _ temporarily to distinguish a mac address
 | |
|                 addressList.append("_{}".format(addr.replace('-', ':').lower()))
 | |
|               else:
 | |
|                 eprint('"{}" is not a valid IP or MAC address, ignoring'.format(ip))
 | |
|           hostName = values[1]
 | |
|           tagReq = values[2] if ((len(values) >= 3) and (len(values[2]) > 0)) else UNSPECIFIED_TAG
 | |
|           if (len(addressList) > 0) and (len(hostName) > 0):
 | |
|             tagListMap[tagReq][HOST_LIST_IDX][hostName].extend(addressList)
 | |
|           else:
 | |
|             eprint('"{}" is not formatted correctly, ignoring'.format(line))
 | |
|         else:
 | |
|           eprint('"{}" is not formatted correctly, ignoring'.format(line))
 | |
| 
 | |
|       # handle mixed entries from the JSON-formatted file
 | |
|       for entry in mixedEntries:
 | |
| 
 | |
|         # the entry must at least contain type, address, name; may optionally contain tag
 | |
|         if (isinstance(entry, dict) and
 | |
|             all(key in entry for key in (JSON_MAP_KEY_TYPE, JSON_MAP_KEY_NAME, JSON_MAP_KEY_ADDR)) and
 | |
|             entry[JSON_MAP_KEY_TYPE] in (JSON_MAP_TYPE_SEGMENT, JSON_MAP_TYPE_HOST) and
 | |
|             (len(entry[JSON_MAP_KEY_NAME]) > 0) and
 | |
|             (len(entry[JSON_MAP_KEY_ADDR]) > 0)):
 | |
| 
 | |
|           addressList = []
 | |
|           networkList = []
 | |
| 
 | |
|           tagReq = entry[JSON_MAP_KEY_TAG] if (JSON_MAP_KEY_TAG in entry) and (len(entry[JSON_MAP_KEY_TAG]) > 0) else UNSPECIFIED_TAG
 | |
| 
 | |
|           # account for comma-separated multiple addresses per 'address' value
 | |
|           for addr in ''.join(entry[JSON_MAP_KEY_ADDR].split()).split(','):
 | |
| 
 | |
|             if (entry[JSON_MAP_KEY_TYPE] == JSON_MAP_TYPE_SEGMENT):
 | |
|               # potentially interpret address as a CIDR-formatted subnet
 | |
|               try:
 | |
|                 networkList.append(str(ipaddress.ip_network(unicode(addr))).lower() if ('/' in addr) else str(ipaddress.ip_address(unicode(addr))).lower())
 | |
|               except ValueError:
 | |
|                 eprint('"{}" is not a valid IP address, ignoring'.format(addr))
 | |
| 
 | |
|             else:
 | |
|               # should be an IP or MAC address
 | |
|               try:
 | |
|                 # see if it's an IP address
 | |
|                 addressList.append(str(ipaddress.ip_address(unicode(addr))).lower())
 | |
|               except ValueError:
 | |
|                 # see if it's a MAC address
 | |
|                 if re.match(macAddrRegex, addr):
 | |
|                   # prepend _ temporarily to distinguish a mac address
 | |
|                   addressList.append("_{}".format(addr.replace('-', ':').lower()))
 | |
|                 else:
 | |
|                   eprint('"{}" is not a valid IP or MAC address, ignoring'.format(ip))
 | |
| 
 | |
|           if (len(networkList) > 0):
 | |
|             tagListMap[tagReq][SEGMENT_LIST_IDX][entry[JSON_MAP_KEY_NAME]].extend(networkList)
 | |
| 
 | |
|           if (len(addressList) > 0):
 | |
|             tagListMap[tagReq][HOST_LIST_IDX][entry[JSON_MAP_KEY_NAME]].extend(addressList)
 | |
| 
 | |
|       # go through the lists of segments/hosts, which will now be organized by required tag first, then
 | |
|       # segment/host name, then the list of addresses
 | |
|       for tag, nameMaps in tagListMap.iteritems():
 | |
|         print("", file=outFile)
 | |
| 
 | |
|         # if a tag name is specified, print the IF statement verifying the tag's presence
 | |
|         if tag != UNSPECIFIED_TAG:
 | |
|           print('  if ("{}" in [tags]) {{'.format(tag), file=outFile)
 | |
|         try:
 | |
| 
 | |
|           # for the host names(s) to be checked, create two filters, one for source IP|MAC and one for dest IP|MAC
 | |
|           for hostName, addrList in nameMaps[HOST_LIST_IDX].iteritems():
 | |
| 
 | |
|             # ip addresses mapped to hostname
 | |
|             ipList = list(set([a for a in addrList if not a.startswith('_')]))
 | |
|             if (len(ipList) >= 1):
 | |
|               for source in ['orig', 'resp']:
 | |
|                 filterId += 1
 | |
|                 fieldName = "{}_h".format(source)
 | |
|                 newFieldName = "{}_hostname".format(source)
 | |
|                 print("", file=outFile)
 | |
|                 print('    if ([zeek][{}]) and ({}) {{ '.format(fieldName, ' or '.join(['([zeek][{}] == "{}")'.format(fieldName, ip) for ip in ipList])), file=outFile)
 | |
|                 print('      mutate {{ id => "mutate_add_autogen_{}_ip_hostname_{}"'.format(source, filterId), file=outFile)
 | |
|                 print('        add_field => {{ "[zeek][{}]" => "{}" }}'.format(newFieldName, hostName), file=outFile)
 | |
|                 print("      }", file=outFile)
 | |
|                 print("    }", file=outFile)
 | |
|                 addedFields.add("[zeek][{}]".format(newFieldName))
 | |
| 
 | |
|             # mac addresses mapped to hostname
 | |
|             macList = list(set([a for a in addrList if a.startswith('_')]))
 | |
|             if (len(macList) >= 1):
 | |
|               for source in ['orig', 'resp']:
 | |
|                 filterId += 1
 | |
|                 fieldName = "{}_l2_addr".format(source)
 | |
|                 newFieldName = "{}_hostname".format(source)
 | |
|                 print("", file=outFile)
 | |
|                 print('    if ([zeek][{}]) and ({}) {{ '.format(fieldName, ' or '.join(['([zeek][{}] == "{}")'.format(fieldName, mac[1:]) for mac in macList])), file=outFile)
 | |
|                 print('      mutate {{ id => "mutate_add_autogen_{}_mac_hostname_{}"'.format(source, filterId), file=outFile)
 | |
|                 print('        add_field => {{ "[zeek][{}]" => "{}" }}'.format(newFieldName, hostName), file=outFile)
 | |
|                 print("      }", file=outFile)
 | |
|                 print("    }", file=outFile)
 | |
|                 addedFields.add("[zeek][{}]".format(newFieldName))
 | |
| 
 | |
|           # for the segment(s) to be checked, create two cidr filters, one for source IP and one for dest IP
 | |
|           for segmentName, ipList in nameMaps[SEGMENT_LIST_IDX].iteritems():
 | |
|             ipList = list(set(ipList))
 | |
|             for source in ['orig', 'resp']:
 | |
|               filterId += 1
 | |
|               # ip addresses/ranges mapped to network segment names
 | |
|               fieldName = "{}_h".format(source)
 | |
|               newFieldName = "{}_segment".format(source)
 | |
|               print("", file=outFile)
 | |
|               print("    if ([zeek][{}]) {{ cidr {{".format(fieldName), file=outFile)
 | |
|               print('      id => "cidr_autogen_{}_segment_{}"'.format(source, filterId), file=outFile)
 | |
|               print('      address => [ "%{{[zeek][{}]}}" ]'.format(fieldName), file=outFile)
 | |
|               print('      network => [ {} ]'.format(', '.join('"{}"'.format(ip) for ip in ipList)), file=outFile)
 | |
|               print('      add_tag => [ "{}" ]'.format(segmentName), file=outFile)
 | |
|               print('      add_field => {{ "[zeek][{}]" => "{}" }}'.format(newFieldName, segmentName), file=outFile)
 | |
|               print("    } }", file=outFile)
 | |
|               addedFields.add("[zeek][{}]".format(newFieldName))
 | |
| 
 | |
|         finally:
 | |
|           # if a tag name is specified, close the IF statement verifying the tag's presence
 | |
|           if tag != UNSPECIFIED_TAG:
 | |
|             print("", file=outFile)
 | |
|             print('  }} # end (if "{}" in [tags])'.format(tag), file=outFile)
 | |
| 
 | |
|     finally:
 | |
|       # deduplicate any added fields
 | |
|       if addedFields:
 | |
|         print("", file=outFile)
 | |
|         print('  # deduplicate any added fields', file=outFile)
 | |
|         for field in list(itertools.product(['orig', 'resp'], ['hostname', 'segment'])):
 | |
|           newFieldName = "[zeek][{}_{}]".format(field[0], field[1])
 | |
|           if newFieldName in addedFields:
 | |
|             print("", file=outFile)
 | |
|             print('  if ({}) {{ '.format(newFieldName), file=outFile)
 | |
|             print('    ruby {{ id => "ruby{}deduplicate"'.format(''.join(c for c, _ in itertools.groupby(re.sub('[^0-9a-zA-Z]+', '_', newFieldName)))), file=outFile)
 | |
|             print('      code => "', file=outFile)
 | |
|             print("        fieldVals = event.get('{}')".format(newFieldName), file=outFile)
 | |
|             print("        if fieldVals.kind_of?(Array) then event.set('{}', fieldVals.uniq) end".format(newFieldName), file=outFile)
 | |
|             print('      "', file=outFile)
 | |
|             print('  } }', file=outFile)
 | |
| 
 | |
|       # close out filter with ending }
 | |
|       print("", file=outFile)
 | |
|       print('} # end Filter', file=outFile)
 | |
| 
 | |
|     if outFile is not sys.stdout:
 | |
|       outFile.close()
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|   main() |