added Malcolm

This commit is contained in:
2021-08-06 10:35:01 +02:00
parent f043730066
commit 70f1922e80
751 changed files with 195277 additions and 0 deletions

View File

@@ -0,0 +1,291 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
import json
import re
import requests
import os
import sys
GET_STATUS_API = 'api/status'
GET_INDEX_PATTERN_INFO_URI = 'api/saved_objects/_find'
GET_FIELDS_URI = 'api/index_patterns/_fields_for_wildcard'
PUT_INDEX_PATTERN_URI = 'api/saved_objects/index-pattern'
ES_GET_TEMPLATE_URI = '_template'
###################################################################################################
debug = False
scriptName = os.path.basename(__file__)
scriptPath = os.path.dirname(os.path.realpath(__file__))
origPath = os.getcwd()
###################################################################################################
# print to stderr
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
###################################################################################################
# convenient boolean argument parsing
def str2bool(v):
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
###################################################################################################
# main
def main():
global debug
parser = argparse.ArgumentParser(description=scriptName, add_help=False, usage='{} <arguments>'.format(scriptName))
parser.add_argument('-v', '--verbose', dest='debug', type=str2bool, nargs='?', const=True, default=False, help="Verbose output")
parser.add_argument('-i', '--index', dest='index', metavar='<str>', type=str, default='sessions2-*', help='Index Pattern Name')
parser.add_argument('-k', '--kibana', dest='kibanaUrl', metavar='<protocol://host:port>', type=str, default=os.getenv('KIBANA_URL', 'http://kibana:5601/kibana'), help='Kibana URL')
parser.add_argument('-e', '--elastic', dest='elasticUrl', metavar='<protocol://host:port>', type=str, default=os.getenv('ELASTICSEARCH_URL', 'http://elasticsearch:9200'), help='Elasticsearch URL')
parser.add_argument('-t', '--template', dest='template', metavar='<str>', type=str, default=None, help='Elasticsearch template to merge')
parser.add_argument('-n', '--dry-run', dest='dryrun', type=str2bool, nargs='?', const=True, default=False, help="Dry run (no PUT)")
try:
parser.error = parser.exit
args = parser.parse_args()
except SystemExit:
parser.print_help()
exit(2)
debug = args.debug
if debug:
eprint(os.path.join(scriptPath, scriptName))
eprint("Arguments: {}".format(sys.argv[1:]))
eprint("Arguments: {}".format(args))
else:
sys.tracebacklimit = 0
# get version number so kibana doesn't think we're doing a XSRF when we do the PUT
statusInfoResponse = requests.get('{}/{}'.format(args.kibanaUrl, GET_STATUS_API))
statusInfoResponse.raise_for_status()
statusInfo = statusInfoResponse.json()
kibanaVersion = statusInfo['version']['number']
if debug:
eprint('Kibana version is {}'.format(kibanaVersion))
esInfoResponse = requests.get(args.elasticUrl)
esInfo = statusInfoResponse.json()
elasticVersion = statusInfo['version']['number']
if debug:
eprint('Elasticsearch version is {}'.format(elasticVersion))
# find the ID of the index name (probably will be the same as the name)
getIndexInfoResponse = requests.get(
'{}/{}'.format(args.kibanaUrl, GET_INDEX_PATTERN_INFO_URI),
params={
'type': 'index-pattern',
'fields': 'id',
'search': '"{}"'.format(args.index)
}
)
getIndexInfoResponse.raise_for_status()
getIndexInfo = getIndexInfoResponse.json()
indexId = getIndexInfo['saved_objects'][0]['id'] if (len(getIndexInfo['saved_objects']) > 0) else None
if debug:
eprint('Index ID for {} is {}'.format(args.index, indexId))
if indexId is not None:
# get the current fields list
getFieldsResponse = requests.get('{}/{}'.format(args.kibanaUrl, GET_FIELDS_URI),
params={ 'pattern': args.index,
'meta_fields': ["_source","_id","_type","_index","_score"] })
getFieldsResponse.raise_for_status()
getFieldsList = getFieldsResponse.json()['fields']
fieldsNames = [field['name'] for field in getFieldsList if 'name' in field]
# get the fields from the template, if specified, and merge those into the fields list
if args.template is not None:
try:
# request template from elasticsearch and pull the mappings/properties (field list) out
getTemplateResponse = requests.get('{}/{}/{}'.format(args.elasticUrl, ES_GET_TEMPLATE_URI, args.template))
getTemplateResponse.raise_for_status()
getTemplateInfo = getTemplateResponse.json()[args.template]['mappings']['properties']
# a field should be merged if it's not already in the list we have from kibana, and it's
# in the list of types we're merging (leave more complex types like nested and geolocation
# to be handled naturally as the data shows up)
for field in getTemplateInfo:
mergeFieldTypes = ("date", "float", "integer", "ip", "keyword", "long", "short", "text")
if ((field not in fieldsNames) and
('type' in getTemplateInfo[field]) and
(getTemplateInfo[field]['type'] in mergeFieldTypes)):
# create field dict in same format as those returned by GET_FIELDS_URI above
mergedFieldInfo = {}
mergedFieldInfo['name'] = field
mergedFieldInfo['esTypes'] = [ getTemplateInfo[field]['type'] ]
if ((getTemplateInfo[field]['type'] == 'float') or
(getTemplateInfo[field]['type'] == 'integer') or
(getTemplateInfo[field]['type'] == 'long') or
(getTemplateInfo[field]['type'] == 'short')):
mergedFieldInfo['type'] = 'number'
elif ((getTemplateInfo[field]['type'] == 'keyword') or
(getTemplateInfo[field]['type'] == 'text')):
mergedFieldInfo['type'] = 'string'
else:
mergedFieldInfo['type'] = getTemplateInfo[field]['type']
mergedFieldInfo['searchable'] = True
mergedFieldInfo['aggregatable'] = ("text" not in mergedFieldInfo['esTypes'])
mergedFieldInfo['readFromDocValues'] = mergedFieldInfo['aggregatable']
fieldsNames.append(field)
getFieldsList.append(mergedFieldInfo)
# elif debug:
# eprint('Not merging {}: {}'.format(field, json.dumps(getTemplateInfo[field])))
except Exception as e:
eprint('"{}" raised for "{}", skipping template merge'.format(str(e), args.template))
if debug:
eprint('{} would have {} fields'.format(args.index, len(getFieldsList)))
# define field formatting map for Kibana -> Arkime drilldown and other URL drilldowns
#
# see: https://github.com/cisagov/Malcolm/issues/133
# https://github.com/mmguero-dev/kibana-plugin-drilldownmenu
#
# fieldFormatMap is
# {
# "zeek.orig_h": {
# "id": "drilldown",
# "params": {
# "parsedUrl": {
# "origin": "https://malcolm.local.lan",
# "pathname": "/kibana/app/kibana",
# "basePath": "/kibana"
# },
# "urlTemplates": [
# null,
# {
# "url": "/idkib2mol/zeek.orig_h == {{value}}",
# "label": "Arkime: zeek.orig_h == {{value}}"
# }
# ]
# }
# },
# ...
# }
fieldFormatMap = {}
for field in getFieldsList:
if field['name'][:1].isalpha():
# for Arkime to query by database field name, see moloch issue/PR 1461/1463
valQuote = '"' if field['type'] == 'string' else ''
valDbPrefix = '' if field['name'].startswith('zeek') else 'db:'
drilldownInfoParamsUrlTemplateValues = {}
drilldownInfoParamsUrlTemplateValues['url'] = '/idkib2mol/{}{} == {}{{{{value}}}}{}'.format(valDbPrefix, field['name'], valQuote, valQuote)
drilldownInfoParamsUrlTemplateValues['label'] = 'Arkime {}: {}{{{{value}}}}{}'.format(field['name'], valQuote, valQuote)
drilldownInfoParamsUrlTemplates = [None, drilldownInfoParamsUrlTemplateValues]
if (field['type'] == 'ip') or (re.search(r'[_\.-](h|ip)$', field['name'], re.IGNORECASE) is not None):
# add drilldown for searching IANA for IP addresses
drilldownInfoParamsUrlTemplateValues = {}
drilldownInfoParamsUrlTemplateValues['url'] = 'https://www.virustotal.com/en/ip-address/{{value}}/information/'
drilldownInfoParamsUrlTemplateValues['label'] = 'VirusTotal IP: {{value}}'
drilldownInfoParamsUrlTemplates.append(drilldownInfoParamsUrlTemplateValues)
elif re.search(r'(^|[\b_\.-])(md5|sha(1|256|384|512))\b', field['name'], re.IGNORECASE) is not None:
# add drilldown for searching VirusTotal for hash signatures
drilldownInfoParamsUrlTemplateValues = {}
drilldownInfoParamsUrlTemplateValues['url'] = 'https://www.virustotal.com/gui/file/{{value}}/detection'
drilldownInfoParamsUrlTemplateValues['label'] = 'VirusTotal Hash: {{value}}'
drilldownInfoParamsUrlTemplates.append(drilldownInfoParamsUrlTemplateValues)
elif re.search(r'(^|[\b_\.-])(hit|signature(_?id))?s?$', field['name'], re.IGNORECASE) is not None:
# add drilldown for searching the web for signature IDs
drilldownInfoParamsUrlTemplateValues = {}
drilldownInfoParamsUrlTemplateValues['url'] = 'https://duckduckgo.com/?q="{{value}}"'
drilldownInfoParamsUrlTemplateValues['label'] = 'Web Search: {{value}}'
drilldownInfoParamsUrlTemplates.append(drilldownInfoParamsUrlTemplateValues)
elif re.search(r'(^|src|dst|source|dest|destination|[\b_\.-])p(ort)?s?$', field['name'], re.IGNORECASE) is not None:
# add drilldown for searching IANA for ports
drilldownInfoParamsUrlTemplateValues = {}
drilldownInfoParamsUrlTemplateValues['url'] = 'https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml?search={{value}}'
drilldownInfoParamsUrlTemplateValues['label'] = 'Port Registry: {{value}}'
drilldownInfoParamsUrlTemplates.append(drilldownInfoParamsUrlTemplateValues)
elif re.search(r'^(zeek\.service|protocol?|network\.protocol)$', field['name'], re.IGNORECASE) is not None:
# add drilldown for searching IANA for services
drilldownInfoParamsUrlTemplateValues = {}
drilldownInfoParamsUrlTemplateValues['url'] = 'https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml?search={{value}}'
drilldownInfoParamsUrlTemplateValues['label'] = 'Service Registry: {{value}}'
drilldownInfoParamsUrlTemplates.append(drilldownInfoParamsUrlTemplateValues)
elif re.search(r'^(network\.transport|zeek\.proto|ipProtocol)$', field['name'], re.IGNORECASE) is not None:
# add URL link for assigned transport protocol numbers
drilldownInfoParamsUrlTemplateValues = {}
drilldownInfoParamsUrlTemplateValues['url'] = 'https://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml'
drilldownInfoParamsUrlTemplateValues['label'] = 'Protocol Registry'
drilldownInfoParamsUrlTemplates.append(drilldownInfoParamsUrlTemplateValues)
elif re.search(r'(as\.number|(src|dst)ASN|asn\.(src|dst))$', field['name'], re.IGNORECASE) is not None:
# add drilldown for searching ARIN for ASN
drilldownInfoParamsUrlTemplateValues = {}
drilldownInfoParamsUrlTemplateValues['url'] = 'https://search.arin.net/rdap/?query={{value}}&searchFilter=asn'
drilldownInfoParamsUrlTemplateValues['label'] = 'ARIN ASN: {{value}}'
drilldownInfoParamsUrlTemplates.append(drilldownInfoParamsUrlTemplateValues)
elif re.search(r'(^zeek\.filetype$|mime[_\.-]?type)', field['name'], re.IGNORECASE) is not None:
# add drilldown for searching mime/media/content types
# TODO: '/' in URL is getting messed up somehow, maybe we need to url encode it manually? not sure...
drilldownInfoParamsUrlTemplateValues = {}
drilldownInfoParamsUrlTemplateValues['url'] = 'https://www.iana.org/assignments/media-types/{{value}}'
drilldownInfoParamsUrlTemplateValues['label'] = 'Media Type Registry: {{value}}'
drilldownInfoParamsUrlTemplates.append(drilldownInfoParamsUrlTemplateValues)
elif re.search(r'(^zeek_files\.extracted$)', field['name'], re.IGNORECASE) is not None:
# add download for extracted/quarantined zeek files
drilldownInfoParamsUrlTemplateValues = {}
drilldownInfoParamsUrlTemplateValues['url'] = '/dl-extracted-files/quarantine/{{value}}'
drilldownInfoParamsUrlTemplateValues['label'] = 'Download (if quarantined)'
drilldownInfoParamsUrlTemplates.append(drilldownInfoParamsUrlTemplateValues)
drilldownInfoParamsUrlTemplateValues = {}
drilldownInfoParamsUrlTemplateValues['url'] = '/dl-extracted-files/preserved/{{value}}'
drilldownInfoParamsUrlTemplateValues['label'] = 'Download (if preserved)'
drilldownInfoParamsUrlTemplates.append(drilldownInfoParamsUrlTemplateValues)
drilldownInfoParams = {}
drilldownInfoParams['urlTemplates'] = drilldownInfoParamsUrlTemplates
drilldownInfo = {}
drilldownInfo['id'] = 'drilldown'
drilldownInfo['params'] = drilldownInfoParams
fieldFormatMap[field['name']] = drilldownInfo
# set the index pattern with our complete list of fields
putIndexInfo = {}
putIndexInfo['attributes'] = {}
putIndexInfo['attributes']['title'] = args.index
putIndexInfo['attributes']['fields'] = json.dumps(getFieldsList)
putIndexInfo['attributes']['fieldFormatMap'] = json.dumps(fieldFormatMap)
if not args.dryrun:
putResponse = requests.put('{}/{}/{}'.format(args.kibanaUrl, PUT_INDEX_PATTERN_URI, indexId),
headers={ 'Content-Type': 'application/json',
'kbn-xsrf': 'true',
'kbn-version': kibanaVersion, },
data=json.dumps(putIndexInfo))
putResponse.raise_for_status()
# if we got this far, it probably worked!
if args.dryrun:
print("success (dry run only, no write performed)")
else:
print("success")
else:
print("failure (could not find Index ID for {})".format(args.index))
if __name__ == '__main__':
main()