#!/usr/bin/env python2 # -*- coding: utf-8 -*- # Copyright (c) 2021 Battelle Energy Alliance, LLC. All rights reserved. from __future__ import print_function import sys import os import re import argparse import struct import ipaddress import itertools import json import pprint import uuid from collections import defaultdict UNSPECIFIED_TAG = '<~<~~>~>' HOST_LIST_IDX = 0 SEGMENT_LIST_IDX = 1 JSON_MAP_TYPE_SEGMENT = 'segment' JSON_MAP_TYPE_HOST = 'host' JSON_MAP_KEY_ADDR = 'address' JSON_MAP_KEY_NAME = 'name' JSON_MAP_KEY_TAG = 'tag' JSON_MAP_KEY_TYPE = 'type' ################################################################################################### # print to stderr def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) ################################################################################################### # recursively convert unicode strings to utf-8 strings def byteify(input): if isinstance(input, dict): return {byteify(key): byteify(value) for key, value in input.iteritems()} elif isinstance(input, list): return [byteify(element) for element in input] elif isinstance(input, unicode): return input.encode('utf-8') else: return input ################################################################################################### # main def main(): # extract arguments from the command line # print (sys.argv[1:]); parser = argparse.ArgumentParser(description='Logstash IP address to Segment Filter Creator', add_help=False, usage='ip-to-segment-logstash.py ') parser.add_argument('-m', '--mixed', dest='mixedInput', metavar='', type=str, nargs='*', default='', help='Input mixed JSON mapping file(s)') parser.add_argument('-s', '--segment', dest='segmentInput', metavar='', type=str, nargs='*', default='', help='Input segment mapping file(s)') parser.add_argument('-h', '--host', dest='hostInput', metavar='', type=str, nargs='*', default='', help='Input host mapping file(s)') parser.add_argument('-o', '--output', dest='output', metavar='', type=str, default='-', help='Output file') try: parser.error = parser.exit args = parser.parse_args() except SystemExit: parser.print_help() exit(2) # read each input file into its own list segmentLines = [] hostLines = [] mixedEntries = [] for inFile in args.segmentInput: if os.path.isfile(inFile): segmentLines.extend([line.strip() for line in open(inFile)]) for inFile in args.hostInput: if os.path.isfile(inFile): hostLines.extend([line.strip() for line in open(inFile)]) for inFile in args.mixedInput: try: tmpMixedEntries = json.load(open(inFile, 'r')) if isinstance(tmpMixedEntries, list): mixedEntries.extend(byteify(tmpMixedEntries)); except: pass # remove comments segmentLines = list(filter(lambda x: (len(x) > 0) and (not x.startswith('#')), segmentLines)) hostLines = list(filter(lambda x: (len(x) > 0) and (not x.startswith('#')), hostLines)) if (len(segmentLines) > 0) or (len(hostLines) > 0) or (len(mixedEntries) > 0): filterId = 0 addedFields = set() outFile = open(args.output, 'w+') if (args.output and args.output != '-') else sys.stdout try: print('filter {', file=outFile) print("", file=outFile) print(" # this file was automatically generated by {}".format(os.path.basename(__file__)), file=outFile) print("", file=outFile) # process segment mappings into a dictionary of two dictionaries of lists (one for hosts, one for segments) # eg., tagListMap[required tag name][HOST_LIST_IDX|SEGMENT_LIST_IDX][network segment name] = [172.16.0.0/12, 192.168.0.0/24, 10.0.0.41] tagListMap = defaultdict(lambda: [defaultdict(list), defaultdict(list)]) # handle segment mappings for line in segmentLines: # CIDR to network segment format: # IP(s)|segment name|required tag # # where: # IP(s): comma-separated list of CIDR-formatted network IP addresses # eg., 10.0.0.0/8, 169.254.0.0/16, 172.16.10.41 # # segment name: segment name to be assigned when event IP address(es) match # # required tag (optional): only check match and apply segment name if the event # contains this tag values = [x.strip() for x in line.split('|')] if len(values) >= 2: networkList = [] for ip in ''.join(values[0].split()).split(','): try: networkList.append(str(ipaddress.ip_network(unicode(ip))).lower() if ('/' in ip) else str(ipaddress.ip_address(unicode(ip))).lower()) except ValueError: eprint('"{}" is not a valid IP address, ignoring'.format(ip)) segmentName = values[1] tagReq = values[2] if ((len(values) >= 3) and (len(values[2]) > 0)) else UNSPECIFIED_TAG if (len(networkList) > 0) and (len(segmentName) > 0): tagListMap[tagReq][SEGMENT_LIST_IDX][segmentName].extend(networkList) else: eprint('"{}" is not formatted correctly, ignoring'.format(line)) else: eprint('"{}" is not formatted correctly, ignoring'.format(line)) # handle hostname mappings macAddrRegex = re.compile(r'([a-fA-F0-9]{2}[:|\-]?){6}') for line in hostLines: # IP or MAC address to host name map: # address|host name|required tag # # where: # address: comma-separated list of IPv4, IPv6, or MAC addresses # eg., 172.16.10.41, 02:42:45:dc:a2:96, 2001:0db8:85a3:0000:0000:8a2e:0370:7334 # # host name: host name to be assigned when event address(es) match # # required tag (optional): only check match and apply host name if the event # contains this tag # values = [x.strip() for x in line.split('|')] if len(values) >= 2: addressList = [] for addr in ''.join(values[0].split()).split(','): try: # see if it's an IP address addressList.append(str(ipaddress.ip_address(unicode(addr))).lower()) except ValueError: # see if it's a MAC address if re.match(macAddrRegex, addr): # prepend _ temporarily to distinguish a mac address addressList.append("_{}".format(addr.replace('-', ':').lower())) else: eprint('"{}" is not a valid IP or MAC address, ignoring'.format(ip)) hostName = values[1] tagReq = values[2] if ((len(values) >= 3) and (len(values[2]) > 0)) else UNSPECIFIED_TAG if (len(addressList) > 0) and (len(hostName) > 0): tagListMap[tagReq][HOST_LIST_IDX][hostName].extend(addressList) else: eprint('"{}" is not formatted correctly, ignoring'.format(line)) else: eprint('"{}" is not formatted correctly, ignoring'.format(line)) # handle mixed entries from the JSON-formatted file for entry in mixedEntries: # the entry must at least contain type, address, name; may optionally contain tag if (isinstance(entry, dict) and all(key in entry for key in (JSON_MAP_KEY_TYPE, JSON_MAP_KEY_NAME, JSON_MAP_KEY_ADDR)) and entry[JSON_MAP_KEY_TYPE] in (JSON_MAP_TYPE_SEGMENT, JSON_MAP_TYPE_HOST) and (len(entry[JSON_MAP_KEY_NAME]) > 0) and (len(entry[JSON_MAP_KEY_ADDR]) > 0)): addressList = [] networkList = [] tagReq = entry[JSON_MAP_KEY_TAG] if (JSON_MAP_KEY_TAG in entry) and (len(entry[JSON_MAP_KEY_TAG]) > 0) else UNSPECIFIED_TAG # account for comma-separated multiple addresses per 'address' value for addr in ''.join(entry[JSON_MAP_KEY_ADDR].split()).split(','): if (entry[JSON_MAP_KEY_TYPE] == JSON_MAP_TYPE_SEGMENT): # potentially interpret address as a CIDR-formatted subnet try: networkList.append(str(ipaddress.ip_network(unicode(addr))).lower() if ('/' in addr) else str(ipaddress.ip_address(unicode(addr))).lower()) except ValueError: eprint('"{}" is not a valid IP address, ignoring'.format(addr)) else: # should be an IP or MAC address try: # see if it's an IP address addressList.append(str(ipaddress.ip_address(unicode(addr))).lower()) except ValueError: # see if it's a MAC address if re.match(macAddrRegex, addr): # prepend _ temporarily to distinguish a mac address addressList.append("_{}".format(addr.replace('-', ':').lower())) else: eprint('"{}" is not a valid IP or MAC address, ignoring'.format(ip)) if (len(networkList) > 0): tagListMap[tagReq][SEGMENT_LIST_IDX][entry[JSON_MAP_KEY_NAME]].extend(networkList) if (len(addressList) > 0): tagListMap[tagReq][HOST_LIST_IDX][entry[JSON_MAP_KEY_NAME]].extend(addressList) # go through the lists of segments/hosts, which will now be organized by required tag first, then # segment/host name, then the list of addresses for tag, nameMaps in tagListMap.iteritems(): print("", file=outFile) # if a tag name is specified, print the IF statement verifying the tag's presence if tag != UNSPECIFIED_TAG: print(' if ("{}" in [tags]) {{'.format(tag), file=outFile) try: # for the host names(s) to be checked, create two filters, one for source IP|MAC and one for dest IP|MAC for hostName, addrList in nameMaps[HOST_LIST_IDX].iteritems(): # ip addresses mapped to hostname ipList = list(set([a for a in addrList if not a.startswith('_')])) if (len(ipList) >= 1): for source in ['orig', 'resp']: filterId += 1 fieldName = "{}_h".format(source) newFieldName = "{}_hostname".format(source) print("", file=outFile) print(' if ([zeek][{}]) and ({}) {{ '.format(fieldName, ' or '.join(['([zeek][{}] == "{}")'.format(fieldName, ip) for ip in ipList])), file=outFile) print(' mutate {{ id => "mutate_add_autogen_{}_ip_hostname_{}"'.format(source, filterId), file=outFile) print(' add_field => {{ "[zeek][{}]" => "{}" }}'.format(newFieldName, hostName), file=outFile) print(" }", file=outFile) print(" }", file=outFile) addedFields.add("[zeek][{}]".format(newFieldName)) # mac addresses mapped to hostname macList = list(set([a for a in addrList if a.startswith('_')])) if (len(macList) >= 1): for source in ['orig', 'resp']: filterId += 1 fieldName = "{}_l2_addr".format(source) newFieldName = "{}_hostname".format(source) print("", file=outFile) print(' if ([zeek][{}]) and ({}) {{ '.format(fieldName, ' or '.join(['([zeek][{}] == "{}")'.format(fieldName, mac[1:]) for mac in macList])), file=outFile) print(' mutate {{ id => "mutate_add_autogen_{}_mac_hostname_{}"'.format(source, filterId), file=outFile) print(' add_field => {{ "[zeek][{}]" => "{}" }}'.format(newFieldName, hostName), file=outFile) print(" }", file=outFile) print(" }", file=outFile) addedFields.add("[zeek][{}]".format(newFieldName)) # for the segment(s) to be checked, create two cidr filters, one for source IP and one for dest IP for segmentName, ipList in nameMaps[SEGMENT_LIST_IDX].iteritems(): ipList = list(set(ipList)) for source in ['orig', 'resp']: filterId += 1 # ip addresses/ranges mapped to network segment names fieldName = "{}_h".format(source) newFieldName = "{}_segment".format(source) print("", file=outFile) print(" if ([zeek][{}]) {{ cidr {{".format(fieldName), file=outFile) print(' id => "cidr_autogen_{}_segment_{}"'.format(source, filterId), file=outFile) print(' address => [ "%{{[zeek][{}]}}" ]'.format(fieldName), file=outFile) print(' network => [ {} ]'.format(', '.join('"{}"'.format(ip) for ip in ipList)), file=outFile) print(' add_tag => [ "{}" ]'.format(segmentName), file=outFile) print(' add_field => {{ "[zeek][{}]" => "{}" }}'.format(newFieldName, segmentName), file=outFile) print(" } }", file=outFile) addedFields.add("[zeek][{}]".format(newFieldName)) finally: # if a tag name is specified, close the IF statement verifying the tag's presence if tag != UNSPECIFIED_TAG: print("", file=outFile) print(' }} # end (if "{}" in [tags])'.format(tag), file=outFile) finally: # deduplicate any added fields if addedFields: print("", file=outFile) print(' # deduplicate any added fields', file=outFile) for field in list(itertools.product(['orig', 'resp'], ['hostname', 'segment'])): newFieldName = "[zeek][{}_{}]".format(field[0], field[1]) if newFieldName in addedFields: print("", file=outFile) print(' if ({}) {{ '.format(newFieldName), file=outFile) print(' ruby {{ id => "ruby{}deduplicate"'.format(''.join(c for c, _ in itertools.groupby(re.sub('[^0-9a-zA-Z]+', '_', newFieldName)))), file=outFile) print(' code => "', file=outFile) print(" fieldVals = event.get('{}')".format(newFieldName), file=outFile) print(" if fieldVals.kind_of?(Array) then event.set('{}', fieldVals.uniq) end".format(newFieldName), file=outFile) print(' "', file=outFile) print(' } }', file=outFile) # close out filter with ending } print("", file=outFile) print('} # end Filter', file=outFile) if outFile is not sys.stdout: outFile.close() if __name__ == '__main__': main()