Files
DetectionLab/Vagrant/resources/malcolm/filebeat/scripts/filebeat-process-zeek-folder.sh
2021-08-06 10:35:01 +02:00

93 lines
3.4 KiB
Bash
Executable File

#!/bin/bash
# Copyright (c) 2021 Battelle Energy Alliance, LLC. All rights reserved.
# for files (sort -V (natural)) under /data/zeek that:
# - are not in processed/ or current/ or upload/ or extract_files/ (-prune)
# - are archive files
# - are not in use (fuser -s)
# 1. move file to processed/ (preserving original subdirectory heirarchy, if any)
# 2. calculate tags based on splitting the file path and filename (splitting on
# on [, -/_])
FILEBEAT_PREPARE_PROCESS_COUNT=1
# ensure only one instance of this script can run at a time
LOCKDIR="/tmp/zeek-beats-process-folder"
export SCRIPT_DIR="$( cd -P "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
export ZEEK_LOG_FIELD_BITMAP_SCRIPT="$SCRIPT_DIR/zeek-log-field-bitmap.py"
export ZEEK_LOG_AUTO_TAG=${AUTO_TAG:-"true"}
ZEEK_LOGS_DIR=${FILEBEAT_ZEEK_DIR:-/data/zeek/}
# remove the lock directory on exit
function cleanup {
if ! rmdir $LOCKDIR; then
echo "Failed to remove lock directory '$LOCKDIR'"
exit 1
fi
}
if mkdir $LOCKDIR; then
# ensure that if we "grabbed a lock", we release it (works for clean exit, SIGTERM, and SIGINT/Ctrl-C)
trap "cleanup" EXIT
# get new zeek logs ready for processing
cd "$ZEEK_LOGS_DIR"
find . -path ./processed -prune -o -path ./current -prune -o -path ./upload -prune -o -path ./extract_files -prune -o -type f -exec file --mime-type "{}" \; | grep -P "(application/gzip|application/x-gzip|application/x-7z-compressed|application/x-bzip2|application/x-cpio|application/x-lzip|application/x-lzma|application/x-rar-compressed|application/x-tar|application/x-xz|application/zip)" | awk -F: '{print $1}' | sort -V | \
xargs -n 1 -P $FILEBEAT_PREPARE_PROCESS_COUNT -I '{}' bash -c '
fuser -s "{}" 2>/dev/null
if [[ $? -ne 0 ]]
then
. $SCRIPT_DIR/filebeat-process-zeek-folder-functions.sh
PROCESS_TIME=$(date +%s%N)
SOURCEDIR="$(dirname "{}")"
DESTDIR="./processed/$SOURCEDIR"
DESTNAME="$DESTDIR/$(basename "{}")"
DESTDIR_EXTRACTED="${DESTNAME}_${PROCESS_TIME}"
LINKDIR="./current"
TAGS=()
if [[ "$ZEEK_LOG_AUTO_TAG" = "true" ]]; then
IFS=",-/_." read -r -a SOURCESPLIT <<< $(echo "{}" | sed "s/\.[^.]*$//")
echo "\"{}\" -> \"${DESTNAME}\""
for index in "${!SOURCESPLIT[@]}"
do
TAG_CANDIDATE="${SOURCESPLIT[index]}"
if ! in_array TAGS "$TAG_CANDIDATE"; then
if [[ -n $TAG_CANDIDATE && ! $TAG_CANDIDATE =~ ^[0-9-]+$ && $TAG_CANDIDATE != "tar" && $TAG_CANDIDATE != "AUTOZEEK" && ! $TAG_CANDIDATE =~ ^AUTOCARVE ]]; then
TAGS+=("${TAG_CANDIDATE}")
fi
fi
done
fi
mkdir -p "$DESTDIR"
mkdir -p "$DESTDIR_EXTRACTED"
mv -v "{}" "$DESTNAME"
python3 -m pyunpack.cli "$DESTNAME" "$DESTDIR_EXTRACTED"
find "$DESTDIR_EXTRACTED" -type f -name "*.log" | while read LOGFILE
do
PROCESS_TIME=$(date +%s%N)
TAGS_JOINED=$(printf "%s," "${TAGS[@]}")${PROCESS_TIME}
FIELDS_BITMAP="$($ZEEK_LOG_FIELD_BITMAP_SCRIPT "$LOGFILE" | head -n 1)"
LINKNAME_BASE="$(basename "$LOGFILE" .log)"
if [[ -n $FIELDS_BITMAP ]]; then
LINKNAME="${LINKNAME_BASE}(${TAGS_JOINED},${FIELDS_BITMAP}).log"
else
LINKNAME="${LINKNAME_BASE}(${TAGS_JOINED}).log"
fi
touch "$LOGFILE"
ln -sfr "$LOGFILE" "$LINKDIR/$LINKNAME"
done
fi
'
fi