312 lines
14 KiB
Python
Executable File
312 lines
14 KiB
Python
Executable File
#!/usr/bin/env python2
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright (c) 2021 Battelle Energy Alliance, LLC. All rights reserved.
|
|
|
|
from __future__ import print_function
|
|
|
|
import sys
|
|
import os
|
|
import re
|
|
import argparse
|
|
import struct
|
|
import ipaddress
|
|
import itertools
|
|
import json
|
|
import pprint
|
|
import uuid
|
|
from collections import defaultdict
|
|
|
|
UNSPECIFIED_TAG = '<~<~<none>~>~>'
|
|
HOST_LIST_IDX = 0
|
|
SEGMENT_LIST_IDX = 1
|
|
|
|
JSON_MAP_TYPE_SEGMENT = 'segment'
|
|
JSON_MAP_TYPE_HOST = 'host'
|
|
JSON_MAP_KEY_ADDR = 'address'
|
|
JSON_MAP_KEY_NAME = 'name'
|
|
JSON_MAP_KEY_TAG = 'tag'
|
|
JSON_MAP_KEY_TYPE = 'type'
|
|
|
|
###################################################################################################
|
|
# print to stderr
|
|
def eprint(*args, **kwargs):
|
|
print(*args, file=sys.stderr, **kwargs)
|
|
|
|
###################################################################################################
|
|
# recursively convert unicode strings to utf-8 strings
|
|
def byteify(input):
|
|
if isinstance(input, dict):
|
|
return {byteify(key): byteify(value)
|
|
for key, value in input.iteritems()}
|
|
elif isinstance(input, list):
|
|
return [byteify(element) for element in input]
|
|
elif isinstance(input, unicode):
|
|
return input.encode('utf-8')
|
|
else:
|
|
return input
|
|
|
|
###################################################################################################
|
|
# main
|
|
def main():
|
|
|
|
# extract arguments from the command line
|
|
# print (sys.argv[1:]);
|
|
parser = argparse.ArgumentParser(description='Logstash IP address to Segment Filter Creator', add_help=False, usage='ip-to-segment-logstash.py <arguments>')
|
|
parser.add_argument('-m', '--mixed', dest='mixedInput', metavar='<STR>', type=str, nargs='*', default='', help='Input mixed JSON mapping file(s)')
|
|
parser.add_argument('-s', '--segment', dest='segmentInput', metavar='<STR>', type=str, nargs='*', default='', help='Input segment mapping file(s)')
|
|
parser.add_argument('-h', '--host', dest='hostInput', metavar='<STR>', type=str, nargs='*', default='', help='Input host mapping file(s)')
|
|
parser.add_argument('-o', '--output', dest='output', metavar='<STR>', type=str, default='-', help='Output file')
|
|
try:
|
|
parser.error = parser.exit
|
|
args = parser.parse_args()
|
|
except SystemExit:
|
|
parser.print_help()
|
|
exit(2)
|
|
|
|
# read each input file into its own list
|
|
segmentLines = []
|
|
hostLines = []
|
|
mixedEntries = []
|
|
|
|
for inFile in args.segmentInput:
|
|
if os.path.isfile(inFile):
|
|
segmentLines.extend([line.strip() for line in open(inFile)])
|
|
|
|
for inFile in args.hostInput:
|
|
if os.path.isfile(inFile):
|
|
hostLines.extend([line.strip() for line in open(inFile)])
|
|
|
|
for inFile in args.mixedInput:
|
|
try:
|
|
tmpMixedEntries = json.load(open(inFile, 'r'))
|
|
if isinstance(tmpMixedEntries, list):
|
|
mixedEntries.extend(byteify(tmpMixedEntries));
|
|
except:
|
|
pass
|
|
|
|
# remove comments
|
|
segmentLines = list(filter(lambda x: (len(x) > 0) and (not x.startswith('#')), segmentLines))
|
|
hostLines = list(filter(lambda x: (len(x) > 0) and (not x.startswith('#')), hostLines))
|
|
|
|
if (len(segmentLines) > 0) or (len(hostLines) > 0) or (len(mixedEntries) > 0):
|
|
|
|
filterId = 0
|
|
addedFields = set()
|
|
|
|
outFile = open(args.output, 'w+') if (args.output and args.output != '-') else sys.stdout
|
|
try:
|
|
print('filter {', file=outFile)
|
|
print("", file=outFile)
|
|
print(" # this file was automatically generated by {}".format(os.path.basename(__file__)), file=outFile)
|
|
print("", file=outFile)
|
|
|
|
# process segment mappings into a dictionary of two dictionaries of lists (one for hosts, one for segments)
|
|
# eg., tagListMap[required tag name][HOST_LIST_IDX|SEGMENT_LIST_IDX][network segment name] = [172.16.0.0/12, 192.168.0.0/24, 10.0.0.41]
|
|
tagListMap = defaultdict(lambda: [defaultdict(list), defaultdict(list)])
|
|
|
|
# handle segment mappings
|
|
for line in segmentLines:
|
|
# CIDR to network segment format:
|
|
# IP(s)|segment name|required tag
|
|
#
|
|
# where:
|
|
# IP(s): comma-separated list of CIDR-formatted network IP addresses
|
|
# eg., 10.0.0.0/8, 169.254.0.0/16, 172.16.10.41
|
|
#
|
|
# segment name: segment name to be assigned when event IP address(es) match
|
|
#
|
|
# required tag (optional): only check match and apply segment name if the event
|
|
# contains this tag
|
|
values = [x.strip() for x in line.split('|')]
|
|
if len(values) >= 2:
|
|
networkList = []
|
|
for ip in ''.join(values[0].split()).split(','):
|
|
try:
|
|
networkList.append(str(ipaddress.ip_network(unicode(ip))).lower() if ('/' in ip) else str(ipaddress.ip_address(unicode(ip))).lower())
|
|
except ValueError:
|
|
eprint('"{}" is not a valid IP address, ignoring'.format(ip))
|
|
segmentName = values[1]
|
|
tagReq = values[2] if ((len(values) >= 3) and (len(values[2]) > 0)) else UNSPECIFIED_TAG
|
|
if (len(networkList) > 0) and (len(segmentName) > 0):
|
|
tagListMap[tagReq][SEGMENT_LIST_IDX][segmentName].extend(networkList)
|
|
else:
|
|
eprint('"{}" is not formatted correctly, ignoring'.format(line))
|
|
else:
|
|
eprint('"{}" is not formatted correctly, ignoring'.format(line))
|
|
|
|
# handle hostname mappings
|
|
macAddrRegex = re.compile(r'([a-fA-F0-9]{2}[:|\-]?){6}')
|
|
for line in hostLines:
|
|
# IP or MAC address to host name map:
|
|
# address|host name|required tag
|
|
#
|
|
# where:
|
|
# address: comma-separated list of IPv4, IPv6, or MAC addresses
|
|
# eg., 172.16.10.41, 02:42:45:dc:a2:96, 2001:0db8:85a3:0000:0000:8a2e:0370:7334
|
|
#
|
|
# host name: host name to be assigned when event address(es) match
|
|
#
|
|
# required tag (optional): only check match and apply host name if the event
|
|
# contains this tag
|
|
#
|
|
values = [x.strip() for x in line.split('|')]
|
|
if len(values) >= 2:
|
|
addressList = []
|
|
for addr in ''.join(values[0].split()).split(','):
|
|
try:
|
|
# see if it's an IP address
|
|
addressList.append(str(ipaddress.ip_address(unicode(addr))).lower())
|
|
except ValueError:
|
|
# see if it's a MAC address
|
|
if re.match(macAddrRegex, addr):
|
|
# prepend _ temporarily to distinguish a mac address
|
|
addressList.append("_{}".format(addr.replace('-', ':').lower()))
|
|
else:
|
|
eprint('"{}" is not a valid IP or MAC address, ignoring'.format(ip))
|
|
hostName = values[1]
|
|
tagReq = values[2] if ((len(values) >= 3) and (len(values[2]) > 0)) else UNSPECIFIED_TAG
|
|
if (len(addressList) > 0) and (len(hostName) > 0):
|
|
tagListMap[tagReq][HOST_LIST_IDX][hostName].extend(addressList)
|
|
else:
|
|
eprint('"{}" is not formatted correctly, ignoring'.format(line))
|
|
else:
|
|
eprint('"{}" is not formatted correctly, ignoring'.format(line))
|
|
|
|
# handle mixed entries from the JSON-formatted file
|
|
for entry in mixedEntries:
|
|
|
|
# the entry must at least contain type, address, name; may optionally contain tag
|
|
if (isinstance(entry, dict) and
|
|
all(key in entry for key in (JSON_MAP_KEY_TYPE, JSON_MAP_KEY_NAME, JSON_MAP_KEY_ADDR)) and
|
|
entry[JSON_MAP_KEY_TYPE] in (JSON_MAP_TYPE_SEGMENT, JSON_MAP_TYPE_HOST) and
|
|
(len(entry[JSON_MAP_KEY_NAME]) > 0) and
|
|
(len(entry[JSON_MAP_KEY_ADDR]) > 0)):
|
|
|
|
addressList = []
|
|
networkList = []
|
|
|
|
tagReq = entry[JSON_MAP_KEY_TAG] if (JSON_MAP_KEY_TAG in entry) and (len(entry[JSON_MAP_KEY_TAG]) > 0) else UNSPECIFIED_TAG
|
|
|
|
# account for comma-separated multiple addresses per 'address' value
|
|
for addr in ''.join(entry[JSON_MAP_KEY_ADDR].split()).split(','):
|
|
|
|
if (entry[JSON_MAP_KEY_TYPE] == JSON_MAP_TYPE_SEGMENT):
|
|
# potentially interpret address as a CIDR-formatted subnet
|
|
try:
|
|
networkList.append(str(ipaddress.ip_network(unicode(addr))).lower() if ('/' in addr) else str(ipaddress.ip_address(unicode(addr))).lower())
|
|
except ValueError:
|
|
eprint('"{}" is not a valid IP address, ignoring'.format(addr))
|
|
|
|
else:
|
|
# should be an IP or MAC address
|
|
try:
|
|
# see if it's an IP address
|
|
addressList.append(str(ipaddress.ip_address(unicode(addr))).lower())
|
|
except ValueError:
|
|
# see if it's a MAC address
|
|
if re.match(macAddrRegex, addr):
|
|
# prepend _ temporarily to distinguish a mac address
|
|
addressList.append("_{}".format(addr.replace('-', ':').lower()))
|
|
else:
|
|
eprint('"{}" is not a valid IP or MAC address, ignoring'.format(ip))
|
|
|
|
if (len(networkList) > 0):
|
|
tagListMap[tagReq][SEGMENT_LIST_IDX][entry[JSON_MAP_KEY_NAME]].extend(networkList)
|
|
|
|
if (len(addressList) > 0):
|
|
tagListMap[tagReq][HOST_LIST_IDX][entry[JSON_MAP_KEY_NAME]].extend(addressList)
|
|
|
|
# go through the lists of segments/hosts, which will now be organized by required tag first, then
|
|
# segment/host name, then the list of addresses
|
|
for tag, nameMaps in tagListMap.iteritems():
|
|
print("", file=outFile)
|
|
|
|
# if a tag name is specified, print the IF statement verifying the tag's presence
|
|
if tag != UNSPECIFIED_TAG:
|
|
print(' if ("{}" in [tags]) {{'.format(tag), file=outFile)
|
|
try:
|
|
|
|
# for the host names(s) to be checked, create two filters, one for source IP|MAC and one for dest IP|MAC
|
|
for hostName, addrList in nameMaps[HOST_LIST_IDX].iteritems():
|
|
|
|
# ip addresses mapped to hostname
|
|
ipList = list(set([a for a in addrList if not a.startswith('_')]))
|
|
if (len(ipList) >= 1):
|
|
for source in ['orig', 'resp']:
|
|
filterId += 1
|
|
fieldName = "{}_h".format(source)
|
|
newFieldName = "{}_hostname".format(source)
|
|
print("", file=outFile)
|
|
print(' if ([zeek][{}]) and ({}) {{ '.format(fieldName, ' or '.join(['([zeek][{}] == "{}")'.format(fieldName, ip) for ip in ipList])), file=outFile)
|
|
print(' mutate {{ id => "mutate_add_autogen_{}_ip_hostname_{}"'.format(source, filterId), file=outFile)
|
|
print(' add_field => {{ "[zeek][{}]" => "{}" }}'.format(newFieldName, hostName), file=outFile)
|
|
print(" }", file=outFile)
|
|
print(" }", file=outFile)
|
|
addedFields.add("[zeek][{}]".format(newFieldName))
|
|
|
|
# mac addresses mapped to hostname
|
|
macList = list(set([a for a in addrList if a.startswith('_')]))
|
|
if (len(macList) >= 1):
|
|
for source in ['orig', 'resp']:
|
|
filterId += 1
|
|
fieldName = "{}_l2_addr".format(source)
|
|
newFieldName = "{}_hostname".format(source)
|
|
print("", file=outFile)
|
|
print(' if ([zeek][{}]) and ({}) {{ '.format(fieldName, ' or '.join(['([zeek][{}] == "{}")'.format(fieldName, mac[1:]) for mac in macList])), file=outFile)
|
|
print(' mutate {{ id => "mutate_add_autogen_{}_mac_hostname_{}"'.format(source, filterId), file=outFile)
|
|
print(' add_field => {{ "[zeek][{}]" => "{}" }}'.format(newFieldName, hostName), file=outFile)
|
|
print(" }", file=outFile)
|
|
print(" }", file=outFile)
|
|
addedFields.add("[zeek][{}]".format(newFieldName))
|
|
|
|
# for the segment(s) to be checked, create two cidr filters, one for source IP and one for dest IP
|
|
for segmentName, ipList in nameMaps[SEGMENT_LIST_IDX].iteritems():
|
|
ipList = list(set(ipList))
|
|
for source in ['orig', 'resp']:
|
|
filterId += 1
|
|
# ip addresses/ranges mapped to network segment names
|
|
fieldName = "{}_h".format(source)
|
|
newFieldName = "{}_segment".format(source)
|
|
print("", file=outFile)
|
|
print(" if ([zeek][{}]) {{ cidr {{".format(fieldName), file=outFile)
|
|
print(' id => "cidr_autogen_{}_segment_{}"'.format(source, filterId), file=outFile)
|
|
print(' address => [ "%{{[zeek][{}]}}" ]'.format(fieldName), file=outFile)
|
|
print(' network => [ {} ]'.format(', '.join('"{}"'.format(ip) for ip in ipList)), file=outFile)
|
|
print(' add_tag => [ "{}" ]'.format(segmentName), file=outFile)
|
|
print(' add_field => {{ "[zeek][{}]" => "{}" }}'.format(newFieldName, segmentName), file=outFile)
|
|
print(" } }", file=outFile)
|
|
addedFields.add("[zeek][{}]".format(newFieldName))
|
|
|
|
finally:
|
|
# if a tag name is specified, close the IF statement verifying the tag's presence
|
|
if tag != UNSPECIFIED_TAG:
|
|
print("", file=outFile)
|
|
print(' }} # end (if "{}" in [tags])'.format(tag), file=outFile)
|
|
|
|
finally:
|
|
# deduplicate any added fields
|
|
if addedFields:
|
|
print("", file=outFile)
|
|
print(' # deduplicate any added fields', file=outFile)
|
|
for field in list(itertools.product(['orig', 'resp'], ['hostname', 'segment'])):
|
|
newFieldName = "[zeek][{}_{}]".format(field[0], field[1])
|
|
if newFieldName in addedFields:
|
|
print("", file=outFile)
|
|
print(' if ({}) {{ '.format(newFieldName), file=outFile)
|
|
print(' ruby {{ id => "ruby{}deduplicate"'.format(''.join(c for c, _ in itertools.groupby(re.sub('[^0-9a-zA-Z]+', '_', newFieldName)))), file=outFile)
|
|
print(' code => "', file=outFile)
|
|
print(" fieldVals = event.get('{}')".format(newFieldName), file=outFile)
|
|
print(" if fieldVals.kind_of?(Array) then event.set('{}', fieldVals.uniq) end".format(newFieldName), file=outFile)
|
|
print(' "', file=outFile)
|
|
print(' } }', file=outFile)
|
|
|
|
# close out filter with ending }
|
|
print("", file=outFile)
|
|
print('} # end Filter', file=outFile)
|
|
|
|
if outFile is not sys.stdout:
|
|
outFile.close()
|
|
|
|
if __name__ == '__main__':
|
|
main() |