93 lines
3.4 KiB
Bash
Executable File
93 lines
3.4 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Copyright (c) 2021 Battelle Energy Alliance, LLC. All rights reserved.
|
|
|
|
|
|
# for files (sort -V (natural)) under /data/zeek that:
|
|
# - are not in processed/ or current/ or upload/ or extract_files/ (-prune)
|
|
# - are archive files
|
|
# - are not in use (fuser -s)
|
|
# 1. move file to processed/ (preserving original subdirectory heirarchy, if any)
|
|
# 2. calculate tags based on splitting the file path and filename (splitting on
|
|
# on [, -/_])
|
|
|
|
FILEBEAT_PREPARE_PROCESS_COUNT=1
|
|
|
|
# ensure only one instance of this script can run at a time
|
|
LOCKDIR="/tmp/zeek-beats-process-folder"
|
|
|
|
export SCRIPT_DIR="$( cd -P "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
|
|
|
export ZEEK_LOG_FIELD_BITMAP_SCRIPT="$SCRIPT_DIR/zeek-log-field-bitmap.py"
|
|
|
|
export ZEEK_LOG_AUTO_TAG=${AUTO_TAG:-"true"}
|
|
|
|
ZEEK_LOGS_DIR=${FILEBEAT_ZEEK_DIR:-/data/zeek/}
|
|
|
|
# remove the lock directory on exit
|
|
function cleanup {
|
|
if ! rmdir $LOCKDIR; then
|
|
echo "Failed to remove lock directory '$LOCKDIR'"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
if mkdir $LOCKDIR; then
|
|
# ensure that if we "grabbed a lock", we release it (works for clean exit, SIGTERM, and SIGINT/Ctrl-C)
|
|
trap "cleanup" EXIT
|
|
|
|
# get new zeek logs ready for processing
|
|
cd "$ZEEK_LOGS_DIR"
|
|
find . -path ./processed -prune -o -path ./current -prune -o -path ./upload -prune -o -path ./extract_files -prune -o -type f -exec file --mime-type "{}" \; | grep -P "(application/gzip|application/x-gzip|application/x-7z-compressed|application/x-bzip2|application/x-cpio|application/x-lzip|application/x-lzma|application/x-rar-compressed|application/x-tar|application/x-xz|application/zip)" | awk -F: '{print $1}' | sort -V | \
|
|
xargs -n 1 -P $FILEBEAT_PREPARE_PROCESS_COUNT -I '{}' bash -c '
|
|
|
|
fuser -s "{}" 2>/dev/null
|
|
if [[ $? -ne 0 ]]
|
|
then
|
|
. $SCRIPT_DIR/filebeat-process-zeek-folder-functions.sh
|
|
|
|
PROCESS_TIME=$(date +%s%N)
|
|
SOURCEDIR="$(dirname "{}")"
|
|
DESTDIR="./processed/$SOURCEDIR"
|
|
DESTNAME="$DESTDIR/$(basename "{}")"
|
|
DESTDIR_EXTRACTED="${DESTNAME}_${PROCESS_TIME}"
|
|
LINKDIR="./current"
|
|
|
|
TAGS=()
|
|
if [[ "$ZEEK_LOG_AUTO_TAG" = "true" ]]; then
|
|
IFS=",-/_." read -r -a SOURCESPLIT <<< $(echo "{}" | sed "s/\.[^.]*$//")
|
|
echo "\"{}\" -> \"${DESTNAME}\""
|
|
for index in "${!SOURCESPLIT[@]}"
|
|
do
|
|
TAG_CANDIDATE="${SOURCESPLIT[index]}"
|
|
if ! in_array TAGS "$TAG_CANDIDATE"; then
|
|
if [[ -n $TAG_CANDIDATE && ! $TAG_CANDIDATE =~ ^[0-9-]+$ && $TAG_CANDIDATE != "tar" && $TAG_CANDIDATE != "AUTOZEEK" && ! $TAG_CANDIDATE =~ ^AUTOCARVE ]]; then
|
|
TAGS+=("${TAG_CANDIDATE}")
|
|
fi
|
|
fi
|
|
done
|
|
fi
|
|
|
|
mkdir -p "$DESTDIR"
|
|
mkdir -p "$DESTDIR_EXTRACTED"
|
|
mv -v "{}" "$DESTNAME"
|
|
python3 -m pyunpack.cli "$DESTNAME" "$DESTDIR_EXTRACTED"
|
|
find "$DESTDIR_EXTRACTED" -type f -name "*.log" | while read LOGFILE
|
|
do
|
|
PROCESS_TIME=$(date +%s%N)
|
|
TAGS_JOINED=$(printf "%s," "${TAGS[@]}")${PROCESS_TIME}
|
|
FIELDS_BITMAP="$($ZEEK_LOG_FIELD_BITMAP_SCRIPT "$LOGFILE" | head -n 1)"
|
|
LINKNAME_BASE="$(basename "$LOGFILE" .log)"
|
|
if [[ -n $FIELDS_BITMAP ]]; then
|
|
LINKNAME="${LINKNAME_BASE}(${TAGS_JOINED},${FIELDS_BITMAP}).log"
|
|
else
|
|
LINKNAME="${LINKNAME_BASE}(${TAGS_JOINED}).log"
|
|
fi
|
|
touch "$LOGFILE"
|
|
ln -sfr "$LOGFILE" "$LINKDIR/$LINKNAME"
|
|
done
|
|
fi
|
|
'
|
|
|
|
fi
|