#!/bin/bash if [[ "$1" == 'debug' ]]; then set -x; _DEBUG=$1; shift; fi if [[ "$1" == 'debug2' ]]; then set -xvT; _DEBUG=$1; shift; fi if [[ "$1" == 'trace' ]]; then _TRACE=$1; shift; fi #################################################################### # - Newbots - # Find new Bots not already in the badbots_map and whitebots_map #################################################################### #------------------------ REALPATH=`realpath $0` WHERE=`dirname $REALPATH` ME=`basename $REALPATH` cd $WHERE . ../system.conf . ../common.conf . ../common.bashlib #------------------------ # Private stuff . ../conf/private/$ME.conf trap cleanup 0 1 2 15 cleanup() { rm -f /tmp/potential_bots.txt } # These globals moved into the conf file #LOGBASE="/var/www/logs" #WHITEBOT_MAP=$(Module wb)/rules/whitebots_map #BADBOT_MAP=$(Module wb)/rules/badbaots_map #NEWBOTS_OUT=$(Module wb)/rules/newbots_list > "$NEWBOTS_OUT" # Grep nach 'Bot' im User-Agent-Feld, extrahiere Wörter die auf 'Bot' enden (case-sensitive) # Beispiel: Apache combined log, User-Agent im letzten Feld (Anführungszeichen entfernen) # Wir holen uns User-Agent und parsen Bots grep -R '"[^"]*Bot[^"]*"' "$LOGBASE"/* | \ awk -F'"' '{print $6}' | \ grep -oE '\b[A-Za-z0-9_-]+Bot\b' | sort -u > /tmp/potential_bots.txt while read -r botname do if ! grep -qxF "$botname" "$WHITEBOT_MAP" && ! grep -qxF "$botname" "$BADBOT_MAP" then echo "$botname" >> "$NEWBOTS_OUT" fi done < /tmp/potential_bots.txt