Files
DetectionLab/Vagrant/resources/malcolm/scripts/package_zeek_logs.sh
2021-08-06 10:35:01 +02:00

121 lines
3.7 KiB
Bash
Executable File

#!/bin/bash
# Copyright (c) 2021 Battelle Energy Alliance, LLC. All rights reserved.
# package up Zeek logs in a format more suitable for upload to Malcolm
#
# directory containing Zeek logs is a parent directory of directories/files named like smb_mapping.04/00/00-05/00/00.log.gz
#
set -e
set -u
set -o pipefail
ENCODING="utf-8"
# options
# -v (verbose)
# -d dir (base directory containing logs, e.g., the parent directory of smb_mapping.04/00/00-05/00/00.log.gz )
# parse command-line options
VERBOSE_FLAG=""
LOG_BASE_DIR=$(pwd)
while getopts 'vd:' OPTION; do
case "$OPTION" in
v)
VERBOSE_FLAG="-v"
;;
d)
LOG_BASE_DIR="$OPTARG"
;;
?)
echo "script usage: $(basename $0) [-v] [-d directory]" >&2
exit 1
;;
esac
done
shift "$(($OPTIND -1))"
# fsize - display byte sizes human readable
function fsize () {
echo "$1" | awk 'function human(x) {
s=" B KiB MiB GiB TiB EiB PiB YiB ZiB"
while (x>=1024 && length(s)>1)
{x/=1024; s=substr(s,5)}
s=substr(s,1,4)
xf=(s==" B ")?"%5d ":"%0.2f"
return sprintf( xf"%s", x, s)
}
{gsub(/^[0-9]+/, human($1)); print}'
}
function fdir () {
[[ -f "$1" ]] && echo "$(dirname "$1")" || echo "$1"
}
# create a temporary directory to store our results in (make sure /tmp is big enough to extract all of these logs into!)
WORKDIR="$(mktemp -d -t malcolm-zeek-XXXXXX)"
# chdir to the base directory containing the logs
pushd "$LOG_BASE_DIR" >/dev/null 2>&1
FULL_PWD="$(realpath "$(pwd)")"
# cleanup - on exit ensure the temporary directory is removed
function cleanup {
popd >/dev/null 2>&1
if ! rm -rf "$WORKDIR"; then
echo "Failed to remove temporary directory '$WORKDIR'" >&2
exit 1
fi
}
if [ -d "$WORKDIR" ]; then
# ensure that if we "grabbed a lock", we release it (works for clean exit, SIGTERM, and SIGINT/Ctrl-C)
trap "cleanup" EXIT
# year month day type hour.0 min.0 sec.0 hour.1 min.1 sec.1
PATTERN='(\./)?([0-9]+)-([0-9]+)-([0-9]+)/(.+)\.([0-9]+):([0-9]+):([0-9]+)-([0-9]+):([0-9]+):([0-9]+)\.log\.gz$'
# find and unzip the compressed zeek logs below this directory into temporary subdirectories that make sense
for GZ_LOG_FILE in $(find . -type f -name "*.log.gz"); do
GZ_LOG_FILE_SUBDIR="$(dirname "$GZ_LOG_FILE")"
GZ_LOG_FILE_DESTDIR="$WORKDIR"/"$GZ_LOG_FILE_SUBDIR"
mkdir -p "$GZ_LOG_FILE_DESTDIR"
if [[ $GZ_LOG_FILE =~ $PATTERN ]]; then
LOG_TYPE=${BASH_REMATCH[5]}
DIR_DATE=${BASH_REMATCH[2]}_${BASH_REMATCH[3]}_${BASH_REMATCH[4]}_${BASH_REMATCH[6]}
LOG_BASENAME="$(echo "$LOG_TYPE" | awk '{print tolower($0)}')".log
DIR_COUNT=0
while [[ true ]]; do
DEST_DIR="$WORKDIR"/$DIR_DATE.$(printf %02d $DIR_COUNT)
DEST_FILE="$DEST_DIR"/"$LOG_BASENAME"
if [[ -e "$DEST_FILE" ]]; then
DIR_COUNT=$((DIR_COUNT+1))
else
break
fi
done
mkdir -p "$DEST_DIR"/
gunzip --to-stdout "$GZ_LOG_FILE" > "$DEST_FILE"
if [[ -n $VERBOSE_FLAG ]]; then
FILE_TYPE="$(file -b "$DEST_FILE")"
FILE_SIZE="$(fsize $(stat --printf="%s" "$DEST_FILE"))"
echo "$DEST_FILE: $FILE_TYPE ($FILE_SIZE)"
fi
fi
done
# package up all of the log files in their respective directories under our temporary one
REPACKAGED_LOGS_TARBALL="$FULL_PWD"/zeek-logs-compressed-$(date +'%Y%m%d_%H%M%S').tar.gz
tar -c -z $VERBOSE_FLAG -C "$WORKDIR" -f $REPACKAGED_LOGS_TARBALL .
if [[ -n $VERBOSE_FLAG ]]; then
FILE_TYPE="$(file -b "$REPACKAGED_LOGS_TARBALL")"
FILE_SIZE="$(fsize $(stat --printf="%s" "$REPACKAGED_LOGS_TARBALL"))"
echo "$REPACKAGED_LOGS_TARBALL: $FILE_TYPE ($FILE_SIZE)"
else
echo "$REPACKAGED_LOGS_TARBALL"
fi
fi