121 lines
3.7 KiB
Bash
Executable File
121 lines
3.7 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Copyright (c) 2021 Battelle Energy Alliance, LLC. All rights reserved.
|
|
|
|
# package up Zeek logs in a format more suitable for upload to Malcolm
|
|
#
|
|
# directory containing Zeek logs is a parent directory of directories/files named like smb_mapping.04/00/00-05/00/00.log.gz
|
|
#
|
|
|
|
set -e
|
|
set -u
|
|
set -o pipefail
|
|
|
|
ENCODING="utf-8"
|
|
|
|
# options
|
|
# -v (verbose)
|
|
# -d dir (base directory containing logs, e.g., the parent directory of smb_mapping.04/00/00-05/00/00.log.gz )
|
|
|
|
# parse command-line options
|
|
VERBOSE_FLAG=""
|
|
LOG_BASE_DIR=$(pwd)
|
|
while getopts 'vd:' OPTION; do
|
|
case "$OPTION" in
|
|
v)
|
|
VERBOSE_FLAG="-v"
|
|
;;
|
|
|
|
d)
|
|
LOG_BASE_DIR="$OPTARG"
|
|
;;
|
|
|
|
?)
|
|
echo "script usage: $(basename $0) [-v] [-d directory]" >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
shift "$(($OPTIND -1))"
|
|
|
|
# fsize - display byte sizes human readable
|
|
function fsize () {
|
|
echo "$1" | awk 'function human(x) {
|
|
s=" B KiB MiB GiB TiB EiB PiB YiB ZiB"
|
|
while (x>=1024 && length(s)>1)
|
|
{x/=1024; s=substr(s,5)}
|
|
s=substr(s,1,4)
|
|
xf=(s==" B ")?"%5d ":"%0.2f"
|
|
return sprintf( xf"%s", x, s)
|
|
}
|
|
{gsub(/^[0-9]+/, human($1)); print}'
|
|
}
|
|
|
|
function fdir () {
|
|
[[ -f "$1" ]] && echo "$(dirname "$1")" || echo "$1"
|
|
}
|
|
|
|
# create a temporary directory to store our results in (make sure /tmp is big enough to extract all of these logs into!)
|
|
WORKDIR="$(mktemp -d -t malcolm-zeek-XXXXXX)"
|
|
|
|
# chdir to the base directory containing the logs
|
|
pushd "$LOG_BASE_DIR" >/dev/null 2>&1
|
|
FULL_PWD="$(realpath "$(pwd)")"
|
|
|
|
# cleanup - on exit ensure the temporary directory is removed
|
|
function cleanup {
|
|
popd >/dev/null 2>&1
|
|
if ! rm -rf "$WORKDIR"; then
|
|
echo "Failed to remove temporary directory '$WORKDIR'" >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
if [ -d "$WORKDIR" ]; then
|
|
# ensure that if we "grabbed a lock", we release it (works for clean exit, SIGTERM, and SIGINT/Ctrl-C)
|
|
trap "cleanup" EXIT
|
|
|
|
# year month day type hour.0 min.0 sec.0 hour.1 min.1 sec.1
|
|
PATTERN='(\./)?([0-9]+)-([0-9]+)-([0-9]+)/(.+)\.([0-9]+):([0-9]+):([0-9]+)-([0-9]+):([0-9]+):([0-9]+)\.log\.gz$'
|
|
|
|
# find and unzip the compressed zeek logs below this directory into temporary subdirectories that make sense
|
|
for GZ_LOG_FILE in $(find . -type f -name "*.log.gz"); do
|
|
GZ_LOG_FILE_SUBDIR="$(dirname "$GZ_LOG_FILE")"
|
|
GZ_LOG_FILE_DESTDIR="$WORKDIR"/"$GZ_LOG_FILE_SUBDIR"
|
|
mkdir -p "$GZ_LOG_FILE_DESTDIR"
|
|
if [[ $GZ_LOG_FILE =~ $PATTERN ]]; then
|
|
LOG_TYPE=${BASH_REMATCH[5]}
|
|
DIR_DATE=${BASH_REMATCH[2]}_${BASH_REMATCH[3]}_${BASH_REMATCH[4]}_${BASH_REMATCH[6]}
|
|
LOG_BASENAME="$(echo "$LOG_TYPE" | awk '{print tolower($0)}')".log
|
|
DIR_COUNT=0
|
|
while [[ true ]]; do
|
|
DEST_DIR="$WORKDIR"/$DIR_DATE.$(printf %02d $DIR_COUNT)
|
|
DEST_FILE="$DEST_DIR"/"$LOG_BASENAME"
|
|
if [[ -e "$DEST_FILE" ]]; then
|
|
DIR_COUNT=$((DIR_COUNT+1))
|
|
else
|
|
break
|
|
fi
|
|
done
|
|
mkdir -p "$DEST_DIR"/
|
|
gunzip --to-stdout "$GZ_LOG_FILE" > "$DEST_FILE"
|
|
if [[ -n $VERBOSE_FLAG ]]; then
|
|
FILE_TYPE="$(file -b "$DEST_FILE")"
|
|
FILE_SIZE="$(fsize $(stat --printf="%s" "$DEST_FILE"))"
|
|
echo "$DEST_FILE: $FILE_TYPE ($FILE_SIZE)"
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# package up all of the log files in their respective directories under our temporary one
|
|
REPACKAGED_LOGS_TARBALL="$FULL_PWD"/zeek-logs-compressed-$(date +'%Y%m%d_%H%M%S').tar.gz
|
|
tar -c -z $VERBOSE_FLAG -C "$WORKDIR" -f $REPACKAGED_LOGS_TARBALL .
|
|
if [[ -n $VERBOSE_FLAG ]]; then
|
|
FILE_TYPE="$(file -b "$REPACKAGED_LOGS_TARBALL")"
|
|
FILE_SIZE="$(fsize $(stat --printf="%s" "$REPACKAGED_LOGS_TARBALL"))"
|
|
echo "$REPACKAGED_LOGS_TARBALL: $FILE_TYPE ($FILE_SIZE)"
|
|
else
|
|
echo "$REPACKAGED_LOGS_TARBALL"
|
|
fi
|
|
|
|
fi |