Here is a simple bash script to get a daily report of search bot results of Success (200) and Failed (404) hits:
#!/bin/bash
# bot_report.sh
# usage: ./bot_report.sh [botName] [logPath]
# default: ./bot_report.sh Googlebot /var/log/httpd/access_log
############################ ##########
# Run this in a daily cron &n bsp; &n bsp; #
# 59 23 * * * /path/to/bot_report.sh #
############################ ##########
# Commands
GREP=/bin/grep
DATE=/bin/date
MKDIR=/bin/mkdir
AWK=/bin/awk
SORT=/bin/sort
UNIQ=/usr/bin/uniq
TMPWATCH=/usr/sbin/tmpwatch< br />CAT=/bin/cat
MAIL=/bin/mail
ECHO=/bin/echo
# Global Variables
DEFAULT_BOT_NAME=Googlebotr />DEFAULT_LOG_FILE=/var/log/ht tpd/access_log
if [ -z "$1" ]; then
BOT_NAME=${DEFAULT_BOT_NAME}r />else
BOT_NAME=$1
fi
if [ -z "$2" ]; then
LOG_FILE=${DEFAULT_LOG_FILE}r />else
LOG_FILE=$2
fi
PREFIX_LOG=`$ECHO $LOG_FILE | sed 's/\//_/g'`
TMP_LOG_PATH=/tmp/bot_report
TMP_LOG_FILE=${TMP_LOG_PATH} /${PREFIX_LOG}_`${DATE} +%F`.log
TMP_REPORT_FILE=${TMP_LOG_PA TH}/${BOT_NAME}_report.txt
EMAIL=user@domain.tld
###########################< br /># Nothing to change below #
###########################< br />
# Produce a temp file to work with for todays date
tmp_file_out() {
[ -d "${TMP_LOG_PATH}" ] || $MKDIR ${TMP_LOG_PATH}
if [ ! -f "${TMP_LOG_FILE}" ]; then
$GREP `$DATE +%d/%b/%Y` $LOG_FILE > $TMP_LOG_FILE
fi
}
# Clean up temp file older than a day
tmp_file_clean() {
$TMPWATCH 24 $TMP_LOG_PATH
}
# Create report
report_out() {
$ECHO "######## Success Hits ########" > $TMP_REPORT_FILE
$GREP " 200 " $TMP_LOG_FILE | $AWK -v bot="$BOT_NAME" '$0 ~ bot {print $7}' | $SORT | $UNIQ -c | $SORT -rn >> $TMP_REPORT_FILE
$ECHO >> $TMP_REPORT_FILE
$ECHO "######## Failed Hits ########" >> $TMP_REPORT_FILE
$GREP " 404 " $TMP_LOG_FILE | $AWK -v bot="$BOT_NAME" '$0 ~ bot {print $7}' | $SORT | $UNIQ -c | $SORT -rn >> $TMP_REPORT_FILE
}
# Mail report
mail_report() {
$CAT $TMP_REPORT_FILE | $MAIL -s "bot report: ${BOT_NAME}" $EMAIL
}
#
# Main
#
tmp_file_out
report_out
mail_report
tmp_file_clean
exit 0
- sandip's blog
- Login or register to post comments