added Malcolm

2021-08-06 10:35:01 +02:00
parent f043730066
commit 70f1922e80
751 changed files with 195277 additions and 0 deletions
--- a/Vagrant/resources/malcolm/logstash/scripts/ip-to-segment-logstash.py
+++ b/Vagrant/resources/malcolm/logstash/scripts/ip-to-segment-logstash.py
@@ -0,0 +1,312 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2021 Battelle Energy Alliance, LLC.  All rights reserved.
+
+from __future__ import print_function
+
+import sys
+import os
+import re
+import argparse
+import struct
+import ipaddress
+import itertools
+import json
+import pprint
+import uuid
+from collections import defaultdict
+
+UNSPECIFIED_TAG = '<~<~<none>~>~>'
+HOST_LIST_IDX = 0
+SEGMENT_LIST_IDX = 1
+
+JSON_MAP_TYPE_SEGMENT = 'segment'
+JSON_MAP_TYPE_HOST = 'host'
+JSON_MAP_KEY_ADDR = 'address'
+JSON_MAP_KEY_NAME = 'name'
+JSON_MAP_KEY_TAG = 'tag'
+JSON_MAP_KEY_TYPE = 'type'
+
+###################################################################################################
+# print to stderr
+def eprint(*args, **kwargs):
+  print(*args, file=sys.stderr, **kwargs)
+
+###################################################################################################
+# recursively convert unicode strings to utf-8 strings
+def byteify(input):
+  if isinstance(input, dict):
+    return {byteify(key): byteify(value)
+      for key, value in input.iteritems()}
+  elif isinstance(input, list):
+    return [byteify(element) for element in input]
+  elif isinstance(input, unicode):
+    return input.encode('utf-8')
+  else:
+    return input
+
+###################################################################################################
+# main
+def main():
+
+  # extract arguments from the command line
+  # print (sys.argv[1:]);
+  parser = argparse.ArgumentParser(description='Logstash IP address to Segment Filter Creator', add_help=False, usage='ip-to-segment-logstash.py <arguments>')
+  parser.add_argument('-m', '--mixed', dest='mixedInput', metavar='<STR>', type=str, nargs='*', default='', help='Input mixed JSON mapping file(s)')
+  parser.add_argument('-s', '--segment', dest='segmentInput', metavar='<STR>', type=str, nargs='*', default='', help='Input segment mapping file(s)')
+  parser.add_argument('-h', '--host', dest='hostInput', metavar='<STR>', type=str, nargs='*', default='', help='Input host mapping file(s)')
+  parser.add_argument('-o', '--output', dest='output', metavar='<STR>', type=str, default='-', help='Output file')
+  try:
+    parser.error = parser.exit
+    args = parser.parse_args()
+  except SystemExit:
+    parser.print_help()
+    exit(2)
+
+  # read each input file into its own list
+  segmentLines = []
+  hostLines = []
+  mixedEntries = []
+
+  for inFile in args.segmentInput:
+    if os.path.isfile(inFile):
+      segmentLines.extend([line.strip() for line in open(inFile)])
+
+  for inFile in args.hostInput:
+    if os.path.isfile(inFile):
+      hostLines.extend([line.strip() for line in open(inFile)])
+
+  for inFile in args.mixedInput:
+    try:
+      tmpMixedEntries = json.load(open(inFile, 'r'))
+      if isinstance(tmpMixedEntries, list):
+        mixedEntries.extend(byteify(tmpMixedEntries));
+    except:
+      pass
+
+  # remove comments
+  segmentLines = list(filter(lambda x: (len(x) > 0) and (not x.startswith('#')), segmentLines))
+  hostLines = list(filter(lambda x: (len(x) > 0) and (not x.startswith('#')), hostLines))
+
+  if (len(segmentLines) > 0) or (len(hostLines) > 0) or (len(mixedEntries) > 0):
+
+    filterId = 0
+    addedFields = set()
+
+    outFile = open(args.output, 'w+') if (args.output and args.output != '-') else sys.stdout
+    try:
+      print('filter {', file=outFile)
+      print("", file=outFile)
+      print("  # this file was automatically generated by {}".format(os.path.basename(__file__)), file=outFile)
+      print("", file=outFile)
+
+      # process segment mappings into a dictionary of two dictionaries of lists (one for hosts, one for segments)
+      # eg., tagListMap[required tag name][HOST_LIST_IDX|SEGMENT_LIST_IDX][network segment name] = [172.16.0.0/12, 192.168.0.0/24, 10.0.0.41]
+      tagListMap = defaultdict(lambda: [defaultdict(list), defaultdict(list)])
+
+      # handle segment mappings
+      for line in segmentLines:
+        # CIDR to network segment format:
+        #   IP(s)|segment name|required tag
+        #
+        # where:
+        #   IP(s): comma-separated list of CIDR-formatted network IP addresses
+        #          eg., 10.0.0.0/8, 169.254.0.0/16, 172.16.10.41
+        #
+        #   segment name: segment name to be assigned when event IP address(es) match
+        #
+        #   required tag (optional): only check match and apply segment name if the event
+        #                            contains this tag
+        values = [x.strip() for x in line.split('|')]
+        if len(values) >= 2:
+          networkList = []
+          for ip in ''.join(values[0].split()).split(','):
+            try:
+              networkList.append(str(ipaddress.ip_network(unicode(ip))).lower() if ('/' in ip) else str(ipaddress.ip_address(unicode(ip))).lower())
+            except ValueError:
+              eprint('"{}" is not a valid IP address, ignoring'.format(ip))
+          segmentName = values[1]
+          tagReq = values[2] if ((len(values) >= 3) and (len(values[2]) > 0)) else UNSPECIFIED_TAG
+          if (len(networkList) > 0) and (len(segmentName) > 0):
+            tagListMap[tagReq][SEGMENT_LIST_IDX][segmentName].extend(networkList)
+          else:
+            eprint('"{}" is not formatted correctly, ignoring'.format(line))
+        else:
+          eprint('"{}" is not formatted correctly, ignoring'.format(line))
+
+      # handle hostname mappings
+      macAddrRegex = re.compile(r'([a-fA-F0-9]{2}[:|\-]?){6}')
+      for line in hostLines:
+        # IP or MAC address to host name map:
+        #   address|host name|required tag
+        #
+        # where:
+        #   address: comma-separated list of IPv4, IPv6, or MAC addresses
+        #          eg., 172.16.10.41, 02:42:45:dc:a2:96, 2001:0db8:85a3:0000:0000:8a2e:0370:7334
+        #
+        #   host name: host name to be assigned when event address(es) match
+        #
+        #   required tag (optional): only check match and apply host name if the event
+        #                            contains this tag
+        #
+        values = [x.strip() for x in line.split('|')]
+        if len(values) >= 2:
+          addressList = []
+          for addr in ''.join(values[0].split()).split(','):
+            try:
+              # see if it's an IP address
+              addressList.append(str(ipaddress.ip_address(unicode(addr))).lower())
+            except ValueError:
+              # see if it's a MAC address
+              if re.match(macAddrRegex, addr):
+                # prepend _ temporarily to distinguish a mac address
+                addressList.append("_{}".format(addr.replace('-', ':').lower()))
+              else:
+                eprint('"{}" is not a valid IP or MAC address, ignoring'.format(ip))
+          hostName = values[1]
+          tagReq = values[2] if ((len(values) >= 3) and (len(values[2]) > 0)) else UNSPECIFIED_TAG
+          if (len(addressList) > 0) and (len(hostName) > 0):
+            tagListMap[tagReq][HOST_LIST_IDX][hostName].extend(addressList)
+          else:
+            eprint('"{}" is not formatted correctly, ignoring'.format(line))
+        else:
+          eprint('"{}" is not formatted correctly, ignoring'.format(line))
+
+      # handle mixed entries from the JSON-formatted file
+      for entry in mixedEntries:
+
+        # the entry must at least contain type, address, name; may optionally contain tag
+        if (isinstance(entry, dict) and
+            all(key in entry for key in (JSON_MAP_KEY_TYPE, JSON_MAP_KEY_NAME, JSON_MAP_KEY_ADDR)) and
+            entry[JSON_MAP_KEY_TYPE] in (JSON_MAP_TYPE_SEGMENT, JSON_MAP_TYPE_HOST) and
+            (len(entry[JSON_MAP_KEY_NAME]) > 0) and
+            (len(entry[JSON_MAP_KEY_ADDR]) > 0)):
+
+          addressList = []
+          networkList = []
+
+          tagReq = entry[JSON_MAP_KEY_TAG] if (JSON_MAP_KEY_TAG in entry) and (len(entry[JSON_MAP_KEY_TAG]) > 0) else UNSPECIFIED_TAG
+
+          # account for comma-separated multiple addresses per 'address' value
+          for addr in ''.join(entry[JSON_MAP_KEY_ADDR].split()).split(','):
+
+            if (entry[JSON_MAP_KEY_TYPE] == JSON_MAP_TYPE_SEGMENT):
+              # potentially interpret address as a CIDR-formatted subnet
+              try:
+                networkList.append(str(ipaddress.ip_network(unicode(addr))).lower() if ('/' in addr) else str(ipaddress.ip_address(unicode(addr))).lower())
+              except ValueError:
+                eprint('"{}" is not a valid IP address, ignoring'.format(addr))
+
+            else:
+              # should be an IP or MAC address
+              try:
+                # see if it's an IP address
+                addressList.append(str(ipaddress.ip_address(unicode(addr))).lower())
+              except ValueError:
+                # see if it's a MAC address
+                if re.match(macAddrRegex, addr):
+                  # prepend _ temporarily to distinguish a mac address
+                  addressList.append("_{}".format(addr.replace('-', ':').lower()))
+                else:
+                  eprint('"{}" is not a valid IP or MAC address, ignoring'.format(ip))
+
+          if (len(networkList) > 0):
+            tagListMap[tagReq][SEGMENT_LIST_IDX][entry[JSON_MAP_KEY_NAME]].extend(networkList)
+
+          if (len(addressList) > 0):
+            tagListMap[tagReq][HOST_LIST_IDX][entry[JSON_MAP_KEY_NAME]].extend(addressList)
+
+      # go through the lists of segments/hosts, which will now be organized by required tag first, then
+      # segment/host name, then the list of addresses
+      for tag, nameMaps in tagListMap.iteritems():
+        print("", file=outFile)
+
+        # if a tag name is specified, print the IF statement verifying the tag's presence
+        if tag != UNSPECIFIED_TAG:
+          print('  if ("{}" in [tags]) {{'.format(tag), file=outFile)
+        try:
+
+          # for the host names(s) to be checked, create two filters, one for source IP|MAC and one for dest IP|MAC
+          for hostName, addrList in nameMaps[HOST_LIST_IDX].iteritems():
+
+            # ip addresses mapped to hostname
+            ipList = list(set([a for a in addrList if not a.startswith('_')]))
+            if (len(ipList) >= 1):
+              for source in ['orig', 'resp']:
+                filterId += 1
+                fieldName = "{}_h".format(source)
+                newFieldName = "{}_hostname".format(source)
+                print("", file=outFile)
+                print('    if ([zeek][{}]) and ({}) {{ '.format(fieldName, ' or '.join(['([zeek][{}] == "{}")'.format(fieldName, ip) for ip in ipList])), file=outFile)
+                print('      mutate {{ id => "mutate_add_autogen_{}_ip_hostname_{}"'.format(source, filterId), file=outFile)
+                print('        add_field => {{ "[zeek][{}]" => "{}" }}'.format(newFieldName, hostName), file=outFile)
+                print("      }", file=outFile)
+                print("    }", file=outFile)
+                addedFields.add("[zeek][{}]".format(newFieldName))
+
+            # mac addresses mapped to hostname
+            macList = list(set([a for a in addrList if a.startswith('_')]))
+            if (len(macList) >= 1):
+              for source in ['orig', 'resp']:
+                filterId += 1
+                fieldName = "{}_l2_addr".format(source)
+                newFieldName = "{}_hostname".format(source)
+                print("", file=outFile)
+                print('    if ([zeek][{}]) and ({}) {{ '.format(fieldName, ' or '.join(['([zeek][{}] == "{}")'.format(fieldName, mac[1:]) for mac in macList])), file=outFile)
+                print('      mutate {{ id => "mutate_add_autogen_{}_mac_hostname_{}"'.format(source, filterId), file=outFile)
+                print('        add_field => {{ "[zeek][{}]" => "{}" }}'.format(newFieldName, hostName), file=outFile)
+                print("      }", file=outFile)
+                print("    }", file=outFile)
+                addedFields.add("[zeek][{}]".format(newFieldName))
+
+          # for the segment(s) to be checked, create two cidr filters, one for source IP and one for dest IP
+          for segmentName, ipList in nameMaps[SEGMENT_LIST_IDX].iteritems():
+            ipList = list(set(ipList))
+            for source in ['orig', 'resp']:
+              filterId += 1
+              # ip addresses/ranges mapped to network segment names
+              fieldName = "{}_h".format(source)
+              newFieldName = "{}_segment".format(source)
+              print("", file=outFile)
+              print("    if ([zeek][{}]) {{ cidr {{".format(fieldName), file=outFile)
+              print('      id => "cidr_autogen_{}_segment_{}"'.format(source, filterId), file=outFile)
+              print('      address => [ "%{{[zeek][{}]}}" ]'.format(fieldName), file=outFile)
+              print('      network => [ {} ]'.format(', '.join('"{}"'.format(ip) for ip in ipList)), file=outFile)
+              print('      add_tag => [ "{}" ]'.format(segmentName), file=outFile)
+              print('      add_field => {{ "[zeek][{}]" => "{}" }}'.format(newFieldName, segmentName), file=outFile)
+              print("    } }", file=outFile)
+              addedFields.add("[zeek][{}]".format(newFieldName))
+
+        finally:
+          # if a tag name is specified, close the IF statement verifying the tag's presence
+          if tag != UNSPECIFIED_TAG:
+            print("", file=outFile)
+            print('  }} # end (if "{}" in [tags])'.format(tag), file=outFile)
+
+    finally:
+      # deduplicate any added fields
+      if addedFields:
+        print("", file=outFile)
+        print('  # deduplicate any added fields', file=outFile)
+        for field in list(itertools.product(['orig', 'resp'], ['hostname', 'segment'])):
+          newFieldName = "[zeek][{}_{}]".format(field[0], field[1])
+          if newFieldName in addedFields:
+            print("", file=outFile)
+            print('  if ({}) {{ '.format(newFieldName), file=outFile)
+            print('    ruby {{ id => "ruby{}deduplicate"'.format(''.join(c for c, _ in itertools.groupby(re.sub('[^0-9a-zA-Z]+', '_', newFieldName)))), file=outFile)
+            print('      code => "', file=outFile)
+            print("        fieldVals = event.get('{}')".format(newFieldName), file=outFile)
+            print("        if fieldVals.kind_of?(Array) then event.set('{}', fieldVals.uniq) end".format(newFieldName), file=outFile)
+            print('      "', file=outFile)
+            print('  } }', file=outFile)
+
+      # close out filter with ending }
+      print("", file=outFile)
+      print('} # end Filter', file=outFile)
+
+    if outFile is not sys.stdout:
+      outFile.close()
+
+if __name__ == '__main__':
+  main()
--- a/Vagrant/resources/malcolm/logstash/scripts/ja3_build_list.py
+++ b/Vagrant/resources/malcolm/logstash/scripts/ja3_build_list.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from __future__ import print_function
+
+import argparse
+import datetime
+import hashlib
+import json
+import os
+import platform
+import pprint
+import re
+import requests
+import string
+import sys
+import yaml
+from collections import defaultdict
+
+###################################################################################################
+debug = False
+PY3 = (sys.version_info.major >= 3)
+scriptName = os.path.basename(__file__)
+scriptPath = os.path.dirname(os.path.realpath(__file__))
+origPath = os.getcwd()
+
+###################################################################################################
+if not PY3:
+  if hasattr(__builtins__, 'raw_input'): input = raw_input
+
+try:
+  FileNotFoundError
+except NameError:
+  FileNotFoundError = IOError
+
+###################################################################################################
+# print to stderr
+def eprint(*args, **kwargs):
+  print(*args, file=sys.stderr, **kwargs)
+
+###################################################################################################
+# convenient boolean argument parsing
+def str2bool(v):
+  if v.lower() in ('yes', 'true', 't', 'y', '1'):
+    return True
+  elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+    return False
+  else:
+    raise argparse.ArgumentTypeError('Boolean value expected.')
+
+###################################################################################################
+# main
+def main():
+  global debug
+
+  parser = argparse.ArgumentParser(description=scriptName, add_help=False, usage='{} <arguments>'.format(scriptName))
+  parser.add_argument('-v', '--verbose', dest='debug', type=str2bool, nargs='?', const=True, default=False, help="Verbose output")
+  parser.add_argument('-o', '--output', required=True, dest='output', metavar='<STR>', type=str, default='', help='Output file')
+  try:
+    parser.error = parser.exit
+    args = parser.parse_args()
+  except SystemExit:
+    parser.print_help()
+    exit(2)
+
+  debug = args.debug
+  if debug:
+    eprint(os.path.join(scriptPath, scriptName))
+    eprint("Arguments: {}".format(sys.argv[1:]))
+    eprint("Arguments: {}".format(args))
+  else:
+    sys.tracebacklimit = 0
+
+  ja3Map = defaultdict(list)
+  fingerprint = None
+
+  urls = ['https://ja3er.com/getAllUasJson']
+  for url in urls:
+    try:
+      for fingerprint in requests.get(url).json():
+        if ('md5' in fingerprint) and fingerprint['md5'] and ('User-Agent' in fingerprint) and fingerprint['User-Agent']:
+          ja3Map[fingerprint['md5']].append(fingerprint['User-Agent'].strip('"').strip("'"))
+    except Exception as e:
+      eprint('"{}" raised for "{}"'.format(str(e), fingerprint))
+
+  try:
+    url = 'https://raw.githubusercontent.com/LeeBrotherston/tls-fingerprinting/master/fingerprints/fingerprints.json'
+    keys = ['record_tls_version', 'ciphersuite', 'extensions', 'e_curves', 'ec_point_fmt']
+    for fingerprint in [x for x in requests.get(url).text.splitlines() if (len(x) > 0) and (not x.startswith('#'))]:
+      try:
+        values = list()
+        tmpMap = defaultdict(str)
+        tmpMap.update(json.loads(fingerprint))
+        for key in keys:
+          values.append('-'.join([str(int(x, 0)) for x in tmpMap[key].split()]))
+        if PY3:
+          ja3Map[hashlib.md5(','.join(values).encode()).hexdigest()].extend(tmpMap['desc'].strip('"').strip("'").split(' / '))
+        else:
+          ja3Map[hashlib.md5(','.join(values)).hexdigest()].extend(tmpMap['desc'].strip('"').strip("'").split(' / '))
+      except Exception as e:
+        eprint('"{}" raised for "{}"'.format(str(e), fingerprint))
+  except Exception as e:
+    eprint('"{}" raised for "{}"'.format(str(e), fingerprint))
+
+  urls = ['https://raw.githubusercontent.com/trisulnsm/ja3prints/master/ja3fingerprint.json']
+  for url in urls:
+    try:
+      for fingerprint in [x for x in requests.get(url).text.splitlines() if (len(x) > 0) and (not x.startswith('#'))]:
+        try:
+          values = list()
+          tmpMap = defaultdict(str)
+          tmpMap.update(json.loads(fingerprint))
+          ja3Map[tmpMap['ja3_hash'].strip()].append(tmpMap['desc'].strip('"').strip("'"))
+        except Exception as e:
+          eprint('"{}" raised for "{}"'.format(str(e), fingerprint))
+    except Exception as e:
+      eprint('"{}" raised for "{}"'.format(str(e), fingerprint))
+
+  # this one has desc and ja3_hash backwards from the previous one
+  urls = ['https://raw.githubusercontent.com/trisulnsm/ja3prints/master/newprints.json']
+  for url in urls:
+    try:
+      for fingerprint in [x for x in requests.get(url).text.splitlines() if (len(x) > 0) and (not x.startswith('#'))]:
+        try:
+          values = list()
+          tmpMap = defaultdict(str)
+          tmpMap.update(json.loads(fingerprint))
+          ja3Map[tmpMap['desc'].strip()].append(tmpMap['ja3_hash'].strip('"').strip("'"))
+        except Exception as e:
+          eprint('"{}" raised for "{}"'.format(str(e), fingerprint))
+    except Exception as e:
+      eprint('"{}" raised for "{}"'.format(str(e), fingerprint))
+
+  # this one is csv (and overlaps the previous one a lot)
+  try:
+    url = 'https://raw.githubusercontent.com/salesforce/ja3/master/lists/osx-nix-ja3.csv'
+    for fingerprint in [x for x in requests.get(url).text.splitlines() if (len(x) > 0) and (not x.startswith('#'))]:
+      vals = ' '.join(fingerprint.split()).split(',', 1)
+      if (len(vals) == 2) and (len(vals[0]) == 32):
+        ja3Map[vals[0].strip()].append(vals[1].strip('"').strip("'"))
+  except Exception as e:
+    eprint('"{}" raised for "{}"'.format(str(e), fingerprint))
+
+  finalMap = dict()
+  for k, v in ja3Map.items():
+    if (len(k) == 32) and all(c in string.hexdigits for c in k):
+      finalMap[k] = list(set([element.strip('"').strip("'").strip() for element in v]))
+
+  with open(args.output, 'w+') as outfile:
+    if PY3:
+      yaml.dump(finalMap, outfile)
+    else:
+      yaml.safe_dump(finalMap, outfile, default_flow_style=False)
+
+if __name__ == '__main__':
+  main()
--- a/Vagrant/resources/malcolm/logstash/scripts/logstash-start.sh
+++ b/Vagrant/resources/malcolm/logstash/scripts/logstash-start.sh
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2021 Battelle Energy Alliance, LLC.  All rights reserved.
+
+set -e
+
+# if any pipelines are volume-mounted inside this docker container, they should belong to subdirectories under this path
+HOST_PIPELINES_DIR="/usr/share/logstash/malcolm-pipelines.available"
+
+# runtime pipelines parent directory
+export PIPELINES_DIR="/usr/share/logstash/malcolm-pipelines"
+
+# runtime pipeliens configuration file
+export PIPELINES_CFG="/usr/share/logstash/config/pipelines.yml"
+
+# for each pipeline in /usr/share/logstash/malcolm-pipelines, append the contents of this file to the dynamically-generated
+# pipeline section in pipelines.yml (then delete 00_config.conf before starting)
+export PIPELINE_EXTRA_CONF_FILE="00_config.conf"
+
+# files defining IP->host and MAC->host mapping
+INPUT_CIDR_MAP="/usr/share/logstash/config/cidr-map.txt"
+INPUT_HOST_MAP="/usr/share/logstash/config/host-map.txt"
+INPUT_MIXED_MAP="/usr/share/logstash/config/net-map.json"
+
+# the name of the enrichment pipeline subdirectory under $PIPELINES_DIR
+ENRICHMENT_PIPELINE=${LOGSTASH_ENRICHMENT_PIPELINE:-"enrichment"}
+
+# the name of the pipeline(s) to which input will send logs for parsing (comma-separated list, no quotes)
+PARSE_PIPELINE_ADDRESSES=${LOGSTASH_PARSE_PIPELINE_ADDRESSES:-"zeek-parse"}
+
+# pipeline addresses for forwarding from Logstash to Elasticsearch (both "internal" and "external" pipelines)
+export ELASTICSEARCH_PIPELINE_ADDRESS_INTERNAL=${LOGSTASH_ELASTICSEARCH_PIPELINE_ADDRESS_INTERNAL:-"internal-es"}
+export ELASTICSEARCH_PIPELINE_ADDRESS_EXTERNAL=${LOGSTASH_ELASTICSEARCH_PIPELINE_ADDRESS_EXTERNAL:-"external-es"}
+ELASTICSEARCH_OUTPUT_PIPELINE_ADDRESSES=${LOGSTASH_ELASTICSEARCH_OUTPUT_PIPELINE_ADDRESSES:-"$ELASTICSEARCH_PIPELINE_ADDRESS_INTERNAL,$ELASTICSEARCH_PIPELINE_ADDRESS_EXTERNAL"}
+
+# ip-to-segment-logstash.py translate $INPUT_CIDR_MAP, $INPUT_HOST_MAP, $INPUT_MIXED_MAP into this logstash filter file
+NETWORK_MAP_OUTPUT_FILTER="$PIPELINES_DIR"/"$ENRICHMENT_PIPELINE"/16_host_segment_filters.conf
+
+####################################################################################################################
+
+# copy over pipeline filters from host-mapped volumes (if any) into their final resting places
+find "$HOST_PIPELINES_DIR" -mindepth 1 -maxdepth 1 -type d -print0 2>/dev/null | sort -z | \
+  xargs -0 -n 1 -I '{}' bash -c '
+  PIPELINE_NAME="$(basename "{}")"
+  PIPELINES_DEST_DIR="$PIPELINES_DIR"/"$PIPELINE_NAME"
+  mkdir -p "$PIPELINES_DEST_DIR"
+  cp -f "{}"/* "$PIPELINES_DEST_DIR"/
+'
+
+# dynamically generate final pipelines.yml configuration file from all of the pipeline directories
+> "$PIPELINES_CFG"
+find "$PIPELINES_DIR" -mindepth 1 -maxdepth 1 -type d -print0 2>/dev/null | sort -z | \
+  xargs -0 -n 1 -I '{}' bash -c '
+  PIPELINE_NAME="$(basename "{}")"
+  PIPELINE_ADDRESS_NAME="$(cat "{}"/*.conf | sed -e "s/:[\}]*.*\(}\)/\1/" | envsubst | grep -P "\baddress\s*=>" | awk "{print \$3}" | sed "s/[\"'']//g" | head -n 1)"
+  if [[ -n "$ES_EXTERNAL_HOSTS" ]] || [[ "$PIPELINE_ADDRESS_NAME" != "$ELASTICSEARCH_PIPELINE_ADDRESS_EXTERNAL" ]]; then
+    echo "- pipeline.id: malcolm-$PIPELINE_NAME"       >> "$PIPELINES_CFG"
+    echo "  path.config: "{}""                         >> "$PIPELINES_CFG"
+    cat "{}"/"$PIPELINE_EXTRA_CONF_FILE" 2>/dev/null   >> "$PIPELINES_CFG"
+    rm -f "{}"/"$PIPELINE_EXTRA_CONF_FILE"
+    echo                                               >> "$PIPELINES_CFG"
+    echo                                               >> "$PIPELINES_CFG"
+  fi
+'
+
+# create filters for network segment and host mapping in the enrichment directory
+rm -f "$NETWORK_MAP_OUTPUT_FILTER"
+/usr/local/bin/ip-to-segment-logstash.py --mixed "$INPUT_MIXED_MAP" --segment "$INPUT_CIDR_MAP" --host "$INPUT_HOST_MAP" -o "$NETWORK_MAP_OUTPUT_FILTER"
+
+if [[ -z "$ES_EXTERNAL_HOSTS" ]]; then
+  # external ES host destination is not specified, remove external destination from enrichment pipeline output
+  ELASTICSEARCH_OUTPUT_PIPELINE_ADDRESSES="$(echo "$ELASTICSEARCH_OUTPUT_PIPELINE_ADDRESSES" | sed "s/,[[:blank:]]*$ELASTICSEARCH_PIPELINE_ADDRESS_EXTERNAL//")"
+fi
+
+# insert quotes around the elasticsearch parsing and output pipeline list
+MALCOLM_PARSE_PIPELINE_ADDRESSES=$(printf '"%s"\n' "${PARSE_PIPELINE_ADDRESSES//,/\",\"}")
+MALCOLM_ELASTICSEARCH_OUTPUT_PIPELINES=$(printf '"%s"\n' "${ELASTICSEARCH_OUTPUT_PIPELINE_ADDRESSES//,/\",\"}")
+
+# do a manual global replace on these particular values in the config files, as Logstash doesn't like the environment variables with quotes in them
+find "$PIPELINES_DIR" -type f -name "*.conf" -exec sed -i "s/_MALCOLM_ELASTICSEARCH_OUTPUT_PIPELINES_/${MALCOLM_ELASTICSEARCH_OUTPUT_PIPELINES}/g" "{}" \; 2>/dev/null
+find "$PIPELINES_DIR" -type f -name "*.conf" -exec sed -i "s/_MALCOLM_PARSE_PIPELINE_ADDRESSES_/${MALCOLM_PARSE_PIPELINE_ADDRESSES}/g" "{}" \; 2>/dev/null
+
+# import trusted CA certificates if necessary
+/usr/local/bin/jdk-cacerts-auto-import.sh || true
+
+# start logstash (adapted from docker-entrypoint)
+env2yaml /usr/share/logstash/config/logstash.yml
+export LS_JAVA_OPTS="-Dls.cgroup.cpuacct.path.override=/ -Dls.cgroup.cpu.path.override=/ $LS_JAVA_OPTS"
+exec logstash