filter { # todo: make added fields not zeek-specific? (see ECS topic branch) # all the lookups are done here, but some of them are still zeek-specific which # IMO isn't the cleanest. could be refactored/split. ######## MAC address OUI (manufacturer) lookup ################################################# # if OUI lookup is requested, enable it mutate { id => "mutate_add_field_env_logstash_oui_lookup" add_field => { "[@metadata][ENV_LOGSTASH_OUI_LOOKUP]" => "${LOGSTASH_OUI_LOOKUP:false}" } } if ([@metadata][ENV_LOGSTASH_OUI_LOOKUP] == "true") { # srcMac/dstMac are arrays at this point, as Arkime expects them to be if ([srcMac]) and ([srcMac][0]) { # attempt lookup of srcMac oui ieee_oui { id => "ieee_oui_srcMac" source => "[srcMac][0]" target => "[zeek][orig_l2_oui]" ouifile => "/usr/share/logstash/config/oui-logstash.txt" refresh_interval => 0 } if ([zeek][orig_l2_oui]) { # merge orig_l2_oui into srcOui array (with a count of 1) mutate { id => "mutate_merge_field_srcOui" merge => { "[srcOui]" => "[zeek][orig_l2_oui]" } } mutate { id => "mutate_add_field_srcOuiCnt" add_field => { "[srcOuiCnt]" => "1" } } # if this is a DHCP log type, copy srcOui to dhcp.oui if ([dhcp][mac]) { mutate { id => "mutate_add_fields_dhcp_oui" add_field => { "[dhcp][oui]" => "%{[srcOui]}" } } mutate { id => "mutate_add_fields_dhcp_ouiCnt" add_field => { "[dhcp][ouiCnt]" => "%{[srcOuiCnt]}" } } } } } # end if [srcMac] if ([dstMac]) and ([dstMac][0]) { # attempt lookup of dstMac oui ieee_oui { id => "ieee_oui_dstMac" source => "[dstMac][0]" target => "[zeek][resp_l2_oui]" ouifile => "/usr/share/logstash/config/oui-logstash.txt" refresh_interval => 0 } if ([zeek][resp_l2_oui]) { # merge resp_l2_oui into dstOui array (with a count of 1) mutate { id => "mutate_merge_field_dstOui" merge => { "[dstOui]" => "[zeek][resp_l2_oui]" } } mutate { id => "mutate_add_field_dstOuiCnt" add_field => { "[dstOuiCnt]" => "1" } } } } # end if [dstMac] } # end if ENV_LOGSTASH_OUI_LOOKUP ################################################################################################ ######## IP address class tagging, GeoIP/ASN lookups, and reverse DNS ########################### if ([srcIp]) { cidr { id => "cidr_add_tag_internal_source" add_tag => [ "internal_source" ] address => [ "%{srcIp}" ] network => [ "0.0.0.0/8", "10.0.0.0/8", "100.64.0.0/10", "127.0.0.0/8", "169.254.0.0/16", "172.16.0.0/12", "192.0.0.0/24", "192.0.2.0/24", "192.88.99.0/24", "192.168.0.0/16", "198.18.0.0/15", "198.51.100.0/24", "203.0.113.0/24", "224.0.0.0/4", "240.0.0.0/4", "255.255.255.255/32", "::/0", "::/128", "::1/128", "fc00::/7", "fe80::/10", "ff00::/8"] } if (!("internal_source" in [tags])) { mutate { id => "mutate_add_tag_external_source" add_tag => [ "external_source" ] } # map srcIp to GEO countries geoip { id => "geoip_srcIp_geo" source => "[srcIp]" target => "[zeek][source_geo]" } geoip { id => "geoip_srcIp_asn" default_database_type => "ASN" source => "[srcIp]" target => "[@metadata][orig_asn]" } if ([zeek][source_geo] and [zeek][source_geo][country_code2]) { mutate { id => "mutate_add_field_srcGEO" add_field => { "[srcGEO]" => "%{[zeek][source_geo][country_code2]}" } } } if ([@metadata][orig_asn] and [@metadata][orig_asn][as_org] and [@metadata][orig_asn][asn]) { mutate { id => "mutate_add_field_srcASN" add_field => { "[srcASN]" => "AS%{[@metadata][orig_asn][asn]} %{[@metadata][orig_asn][as_org]}" } } } # if reverse DNS for public IP addresses is enabled (via environment variable) do it mutate { add_field => { "[@metadata][ENV_LOGSTASH_REVERSE_DNS]" => "${LOGSTASH_REVERSE_DNS:false}" } } if ([@metadata][ENV_LOGSTASH_REVERSE_DNS] == "true") { mutate { id => "mutate_add_field_srcip_for_dns" add_field => { "[zeek][source_ip_reverse_dns]" => "%{[srcIp]}" } } dns { id => "dns_source_ip_reverse_dns" reverse => [ "[zeek][source_ip_reverse_dns]" ] action => "replace" hit_cache_size => 8000 hit_cache_ttl => 300 failed_cache_size => 8000 failed_cache_ttl => 60 } if ([srcIp] == [zeek][source_ip_reverse_dns]) { mutate { id => "mutate_remove_field_source_ip_reverse_dns" remove_field => [ "[zeek][source_ip_reverse_dns]" ] } } } } cidr { id => "cidr_detect_network_type_ipv4_source" add_field => { "[network][type]" => "ipv4" } address => [ "%{srcIp}" ] network => [ "0.0.0.0/0" ] } if (![network][type]) { mutate { id => "mutate_add_network_type_ipv4_source" add_field => { "[network][type]" => "ipv6" } } } } # if ([srcIp]) if ([dstIp]) { cidr { id => "cidr_add_tag_internal_destination" add_tag => [ "internal_destination" ] address => [ "%{dstIp}" ] network => [ "0.0.0.0/8", "10.0.0.0/8", "100.64.0.0/10", "127.0.0.0/8", "169.254.0.0/16", "172.16.0.0/12", "192.0.0.0/24", "192.0.2.0/24", "192.88.99.0/24", "192.168.0.0/16", "198.18.0.0/15", "198.51.100.0/24", "203.0.113.0/24", "224.0.0.0/4", "240.0.0.0/4", "255.255.255.255/32", "::/0", "::/128", "::1/128", "fc00::/7", "fe80::/10", "ff00::/8"] } if (!("internal_destination" in [tags])) { mutate { id => "mutate_add_tag_external_destination" add_tag => [ "external_destination" ] } # map dstIp to GEO countries geoip { id => "geoip_dstIp_geo" source => "[dstIp]" target => "[zeek][destination_geo]" } geoip { id => "geoip_dstIp_asn" default_database_type => "ASN" source => "[dstIp]" target => "[@metadata][resp_asn]" } if ([zeek][destination_geo] and [zeek][destination_geo][country_code2]) { mutate { id => "mutate_add_field_dstGEO" add_field => { "[dstGEO]" => "%{[zeek][destination_geo][country_code2]}" } } } if ([@metadata][resp_asn] and [@metadata][resp_asn][as_org] and [@metadata][resp_asn][asn]) { mutate { id => "mutate_add_field_dstASN" add_field => { "[dstASN]" => "AS%{[@metadata][resp_asn][asn]} %{[@metadata][resp_asn][as_org]}" } } } # if reverse DNS for public IP addresses is enabled (via environment variable) do it if (![@metadata][ENV_LOGSTASH_REVERSE_DNS]) { mutate { add_field => { "[@metadata][ENV_LOGSTASH_REVERSE_DNS]" => "${LOGSTASH_REVERSE_DNS:false}" } } } if ([@metadata][ENV_LOGSTASH_REVERSE_DNS] == "true") { mutate { id => "mutate_add_field_dstip_for_dns" add_field => { "[zeek][destination_ip_reverse_dns]" => "%{[dstIp]}" } } dns { id => "dns_destination_ip_reverse_dns" reverse => [ "[zeek][destination_ip_reverse_dns]" ] action => "replace" hit_cache_size => 8000 hit_cache_ttl => 300 failed_cache_size => 8000 failed_cache_ttl => 60 } if ([dstIp] == [zeek][destination_ip_reverse_dns]) { mutate { id => "mutate_remove_field_destination_ip_reverse_dns" remove_field => [ "[zeek][destination_ip_reverse_dns]" ] } } } } if (![network][type]) { cidr { id => "cidr_detect_network_type_ipv4_dest" add_field => { "[network][type]" => "ipv4" } address => [ "%{dstIp}" ] network => [ "0.0.0.0/0" ] } if (![network][type]) { mutate { id => "mutate_add_network_type_ipv4_dest" add_field => { "[network][type]" => "ipv6" } } } } } # if ([dstIp]) if ([dns][host]) { # if requested, look up DNS queries using freq_server.py to get entropy scores ruby { id => "ruby_dns_freq_lookup" init => " require 'net/http' require 'cgi' $freqLookupEnabled = ENV['FREQ_LOOKUP'] || 'false' " # freq_server.py returns a string like: (2.9333, 3.6353) code => " if ($freqLookupEnabled == 'true') then scoresv1 = Array.new scoresv2 = Array.new scoresTmp = Array.new begin event.get('[dns][host]').each { |query| if (query.length >= 4) and (query !~ /(ip6\.int|ip6\.arpa|in-addr\.arpa|b32\.i2p)$/i) then scoresTmp.clear scoresTmp.concat(Net::HTTP.get_response(URI.parse('http://freq:10004/measure/' + CGI.escape(query))).body.gsub(/(^\(|\)$|\s+)/, '').split(',').map(&:to_f)) if (scoresTmp.length == 2) then scoresv1 << scoresTmp[0] scoresv2 << scoresTmp[1] end end } rescue Exception => e event.set('ruby_exception', 'ruby_dns_freq_lookup: ' + e.message) end event.set('[zeek][freq_score_v1]', scoresv1) unless (scoresv1.length == 0) event.set('[zeek][freq_score_v2]', scoresv2) unless (scoresv2.length == 0) end" } } # end if dns.ip if ([dns][ip]) and ([dns][ip][0]) { # if this is a DNS record with an IP, GeoIP it as well geoip { id => "geoip_dns_ip_asn" default_database_type => "ASN" source => "[dns][ip][0]" target => "[@metadata][dns_asn]" } if ([@metadata][dns_asn] and [@metadata][dns_asn][as_org] and [@metadata][dns_asn][asn]) { # this is stupid, the %{} doesn't seem to be liked by mutate.merge mutate { id => "mutate_add_field_dns_asn" add_field => { "[@metadata][asn_str]" => "AS%{[@metadata][dns_asn][asn]} %{[@metadata][dns_asn][as_org]}" } } mutate { id => "mutate_merge_dns_asn" merge => { "[dns][ASN]" => "[@metadata][asn_str]" } } } geoip { id => "geoip_dns_ip_geo" source => "[dns][ip][0]" target => "[@metadata][dns_geo]" } if ([@metadata][dns_geo] and [@metadata][dns_geo][country_code2]) { mutate { id => "mutate_merge_dns_geo" merge => { "[dns][GEO]" => "[@metadata][dns_geo][country_code2]" } } } } # end if dns.ip if ([radius]) { # if this is a Radius record with IP addresses, GeoIP them as well if ([radius][framedIp]) and ([radius][framedIp][0]) { geoip { id => "geoip_radius_framedIp_asn" default_database_type => "ASN" source => "[radius][framedIp][0]" target => "[@metadata][radius_asn]" } if ([@metadata][radius_asn] and [@metadata][radius_asn][as_org] and [@metadata][radius_asn][asn]) { # this is stupid, the %{} doesn't seem to be liked by mutate.merge mutate { id => "mutate_add_field_radius_asn" add_field => { "[@metadata][asn_str]" => "AS%{[@metadata][radius_asn][asn]} %{[@metadata][radius_asn][as_org]}" } } mutate { id => "mutate_merge_radius_asn" merge => { "[radius][framedASN]" => "[@metadata][asn_str]" } } } geoip { id => "geoip_radius_framedIp_geo" source => "[radius][framedIp][0]" target => "[@metadata][radius_geo]" } if ([@metadata][radius_geo] and [@metadata][radius_geo][country_code2]) { mutate { id => "mutate_merge_radius_geo" merge => { "[radius][framedGEO]" => "[@metadata][radius_geo][country_code2]" } } } } # end if radius.framedIp if ([radius][endpointIp]) and ([radius][endpointIp][0]) { geoip { id => "geoip_radius_endpointIp_asn" default_database_type => "ASN" source => "[radius][endpointIp][0]" target => "[@metadata][radius_asn]" } if ([@metadata][radius_asn] and [@metadata][radius_asn][as_org] and [@metadata][radius_asn][asn]) { # this is stupid, the %{} doesn't seem to be liked by mutate.merge mutate { id => "mutate_add_field_radius_endpoint_asn" add_field => { "[@metadata][asn_str]" => "AS%{[@metadata][radius_asn][asn]} %{[@metadata][radius_asn][as_org]}" } } mutate { id => "mutate_merge_radius_endpoint_asn" merge => { "[radius][endpointASN]" => "[@metadata][asn_str]" } } } geoip { id => "geoip_radius_endpointIp_geo" source => "[radius][endpointIp][0]" target => "[@metadata][radius_geo]" } if ([@metadata][radius_geo] and [@metadata][radius_geo][country_code2]) { mutate { id => "mutate_merge_radius_endpoint_geo" merge => { "[radius][endpointGEO]" => "[@metadata][radius_geo][country_code2]" } } } } # end if radius.endpointIp } # end if radius if ([zeek_cip_identity][socket_address]) { # if this is a zeek_cip_identity record with socket_address, ASN/GeoIP it as well geoip { id => "geoip_zeek_cip_identity_socket_address" default_database_type => "ASN" source => "[zeek_cip_identity][socket_address]" target => "[@metadata][zeek_cip_identity_asn]" } if ([@metadata][zeek_cip_identity_asn] and [@metadata][zeek_cip_identity_asn][as_org] and [@metadata][zeek_cip_identity_asn][asn]) { # this is stupid, the %{} doesn't seem to be liked by mutate.merge mutate { id => "mutate_add_field_zeek_cip_identity_asn" add_field => { "[@metadata][cip_asn_str]" => "AS%{[@metadata][zeek_cip_identity_asn][asn]} %{[@metadata][zeek_cip_identity_asn][as_org]}" } } mutate { id => "mutate_merge_zeek_cip_identity_asn" merge => { "[zeek_cip_identity][socket_address_asn]" => "[@metadata][cip_asn_str]" } } } geoip { id => "geoip_zeek_cip_identity_socket_address_geo" source => "[zeek_cip_identity][socket_address]" target => "[zeek_cip_identity][socket_address_geo]" } } # end if zeek_cip_identity.socket_address if ([zeek_ssl][server_name]) { mutate { add_field => { "[@metadata][ENV_FREQ_LOOKUP]" => "${FREQ_LOOKUP:false}" } } if ([@metadata][ENV_FREQ_LOOKUP] == "true") { # if requested, look up zeek_ssl.server_name queries using freq_server.py to get entropy scores http { id => "rest_zeek_ssl_server_name_freq_lookup" url => "http://freq:10004/measure/%{[zeek_ssl][server_name]}" target_body => "[@metadata][zeek_ssl_server_name_freq]" } if ([@metadata][zeek_ssl_server_name_freq]) { grok { id => "grok_zeek_ssl_server_name_freq_parse" match => { "[@metadata][zeek_ssl_server_name_freq]" => [ "^\(%{NUMBER:[zeek][freq_score_v1]}, %{NUMBER:[zeek][freq_score_v2]}\)$" ] } } } } } # end if zeek_ssl.server_name ################################################################################################ ######## JA3 community hashes lookup ########################################################### # ja3/ja3s are arrays at this point, as Arkime expects them to be if ([tls][ja3]) and ([tls][ja3][0]) { translate { id => "translate_ssl_ja3" field => "[tls][ja3][0]" destination => "[zeek_ssl][ja3_desc]" dictionary_path => "/etc/ja3.yaml" } } if ([tls][ja3s]) and ([tls][ja3s][0]) { translate { id => "translate_ssl_ja3s" field => "[tls][ja3s][0]" destination => "[zeek_ssl][ja3s_desc]" dictionary_path => "/etc/ja3.yaml" } } ################################################################################################ # this identifies which node the log came from if ([host][name]) { mutate { id => "mutate_add_field_host_name_node" add_field => { "[node]" => "%{[host][name]}" } } } else { mutate { id => "mutate_add_field_logstash_node" add_field => { "[node]" => "logs" } } } if ([log][file][path]) { # trim path portion of log.file.path mutate { id => "mutate_gsub_field_zeek_log_file_path" gsub => [ "[log][file][path]", "^.*/", "" ] } } # remove some useless beats-related fields mutate { id => "mutate_remove_field_beats_useless" remove_field => [ "[beat]", "[agent][ephemeral_id]", "[log][offset]", "[input][type]", "[prospector]", "[message]" ] } } # end Filter