#!/bin/sh
# checkdsl.sh: restart DSL connection if 100% ping loss to external servers.
# For use with inittab. (nobodyo<nospam>@</nospam>web.de)
# It uses only ips of access point and (name)servers to avoid the name resolving timeouts because they 
# enlarge the ping timeouts.
#
# DNS servers without ping: 62.96.128.66, 194.25.2.130
# Ok but not used: 194.162.162.194, 193.141.40.42, 195.202.33.68 193.189.224.2 194.183.128.36 212.44.160.8,
# 195.3.96.67 193.193.144.12 145.253.2.75, 134.60.1.111, 62.26.26.62,
# because for big downloads/uploads the ping timeout must be greater than 
# 7 s and four servers are enough and can be checked within a half minute (4*7s=28s). 
# 
# 2007: Added a first deadline with modem/router powercycle (http://www.true-random.com/homepage/projects/8fa/), 
#       and a second with a server reboot. At both deadlines a reporting mail is send to root.
#       Don't forget to eleminate BIOS passwords and configurations which stop booting!
#       Added the script stopdsl in /etc/crontab to synchronise this script with controlled termination and
#       restart of the DSL connection.
# 2008: Added Macchanger, switched to 5 s timeout because the FritzBox shows a constant increasing ping and
#       with a 10 Mbit fullduplex connection the ping with a DSL 16000 connection is lower with a working connection.
# 2009: Added killing of the pppd via signal 9 for defined termination. This reduced the number of powercycles per 
#       week more than 90 %!.
#
# 2010-03: Switch from SuSE to Debian. Added infix "." and suffix "sh".
#       Switch from a maximum uptime of 1/5 day to 1/2 day with a drag to 0:00 and 12:00.
#       Todos: A function for checking and restarting several important net services,
#       to reduce redundancy, Parameter for the device, where the modem is connected.

# be verbose (comment for less verbosity)
set -x

FIRST_DEADLINE="3"   # Deadline of loops (approx. 50 s/loop) for Modem powercycle.
                      # The deadline is reached at <counter> modulo FIRST_DEADLINE equals 
                      # FIRST_DEADLINE-1. So with 5 the deadline is reached at 4, 9, 14, ...
SECOND_DEADLINE="34"  # deadline of loops for Server reboot: reboot will be done at <counter> > SECOND_DEADLINE

RELOADLINE="2"        # loop counter for torctl reload. 2 means reload every second loop.

MAXIMUM_CONNECTION_TIME="43200" # maximum connection time (s)

TOUCHFILE="/var/tmp/checkdsl.touchfile" # file for touching, for checking via a watchdog script/program


########## Lockfile Part #####################

sleeptime="1"           # sleeptime for creating the lockfile
retries="10"		# default number of retries of creating the lockfile: 10, should be > locktimeout*sleeptime
locktimeout="5"         # default timeout : 5 s. The lockfile will be removed
                        # by force after locktimeout seconds have passed since the lock-
                        # file was last modified/created. Lockfile is clock skew immune.
lockdir="/var/tmp"      # directory for the lock file
# Eleminate the optional bash call with sed and get this process name from basename.
this_process="$(basename "$(ps -p $$ -o cmd= | sed 's/^[^ ]*bash //')")"
lockfile="$lockdir/lockfile.$this_process" # lockfile name

# ascertain whether we have lockf or lockfile system apps
check ()
{
  if [ -z "$(which lockfile | grep -v '^no ')" ] ; then
    echo "$0 failed: 'lockfile' utility not found in PATH." >&2
    exit 1
  fi
}


# make lockifle
lock () 
{
  typeset -i pid=0
  # check if a lockfile is present  
  if [ -f "$lockfile" ]; then 
    # check the PID in the lockfile
    pid="$(cat "$lockfile")"
    if [ $pid -eq 0 ]; then 
      echo "Could not read a valid PID from the lockfile."
      echo "Trying to remove that lockfile"
      echo "$lockfile"
      echo "."
      rm -f "$lockfile"
    else
      if kill -0 $pid 2> /dev/null; then
        echo "The locking executable with pid $pid appears to be already running."
	# init uses /bin/sh, so sed must be used with sh and not bash
#        locking_process="$(basename "$(ps -p $pid -o cmd= | sed 's/^[^ ]*sh //')")"
#        # check if the process with the found PID has the name of this skript (run this skript always without /bin/bash or comment the lines with the process name)
#        #if [ "$locking_process" == "$0" ] ; then
#        if [ "$locking_process" == "$this_process" ] ; then
#          echo "The locking executable has the same name (without the path) as this script"
#	  echo "$this_process"
#	  echo "."
#	  # check if the process with the found UID
	  if [ $(ps -p $pid -o uid=) == $UID ] ; then
	    echo "The locking process has been created from the same user $UID which is running this script; exiting."
	    exit 1
	  else
            echo "The locking process has been created from the different user"
            echo $(ps -p $pid -o uid=)
	    echo "; the user (UID) of this script is $UID."
	    # If you want to (try to) kill the blocking process, uncomment the following 3 lines.
	    echo "Try to kill this locking process."
  	    kill -9 $pid
	    rm -f "$lockfile"
            echo "Done killing and lockfile deletion."
	    # Maybe in the line before the next fi you should send an email to root@localhost that a user tried (or maybe caused)
	    # a DOS attack and that the blocking process (here undocumented because already killed) was killed.
          fi
#        else
#          echo "The locking executable"
#	  echo "$locking_process"
#	  echo "DOES NOT has the same name as this script,"
#	  echo "$this_process"
# 	  echo "."
#  	  echo "Trying to remove that lockfile $lockfile."
#	  rm -f "$lockfile"
#        fi
      else
        echo "The locking executable with pid $pid has completed or was killed without cleaning up its lockfile"
        echo "or the locking executable has another name than this script or it is run by an other user;"
        echo "removing that lockfile"
        echo "$lockfile"
        echo "."
        rm -f "$lockfile"
      fi
    fi
    else
      echo "no old lock file found"
  fi
  # (try to) create the lockfile; wait 
  if ! lockfile -$sleeptime -r $retries -l $locktimeout "$lockfile" 2> /dev/null; then
    echo "$0: Failed: Couldn't create lockfile in time" >&2
    exit 1
  fi
  chmod u+rw "$lockfile"
  # store the pid
  echo $$ > "$lockfile"
  chmod u-wx "$lockfile"
  # A trap to delete the lockfile when the script gets killed by SIGHUP SIGINT or SIGTERM.
  # In many cases, e. g. a kernel hangup, this does not work and the checks above are necessary.
  # Also wait some seconds before termination for the Modem Poweroff.
  trap "rm -f $lockfile; sleep 5; exit" SIGHUP SIGINT SIGTERM
}


# cleanup
unlock () 
{
  rm -f "$lockfile"
}


#################### "main" ##############################

# working directory: here
dirname="$(dirname "$0")"

# log file for the loop counter
counterfilename="checkdsl.counter"

echo "Start of main part at"
date | xargs echo

# variables
# flag for connection ok
typeset -i flag
# endless loop cycle counter
typeset -i i=0
# tmp
typeset -i i_tmp=0

# initialize the deadline counter
typeset -i deadline_counter=0

# init for maximum connection time
#typeset -i time0=`expr \`date +%s\``
typeset -i time0=$(date +%s)
typeset -i time1=$time0

# delay to avoit too fast respawning
sleep 1

# Runlevel: Checks only at 2 <= runlevel <= 5 and assure this
# e. g. by /etc/inittab and the default runlevel.
RL=$(/sbin/runlevel | awk '{ print $2 }')
if [ $RL -ge 2 -a $RL -le 5 ] ;
then
  echo "2 <= runlevel <= 5, ok" # ok
else
  echo "2 <= runlevel <= 5, is not fullfilled; exiting"
  exit 0
fi

# lockfile: first check, then lock
check
lock

# Modem power on
/root/bin/modemon.sh

# initialize the counter file
cp /dev/null "$dirname/$counterfilename"

# Uptime: Checks only min. 60 s after boot.
# Measured uptime at this point: 86 - 124 s
#UT=$(awk '{ print $1 }' /proc/uptime)
UT=$(sed "s#\..*##" /proc/uptime)
if [ $UT -gt 150 ] ;
then
  echo "uptime $UT > 150 s"  # ok
else
  echo "uptime UT <= 150 s; waiting 15 seconds"
  # Modem power off
#  /root/bin/modemoff.sh
#  # change the MAC
#  /etc/network/if-up.d/macchanger2.sh eth1 &
#  sleep 5
#  # Modem power on
#  /root/bin/modemon.sh
  sleep 15
fi

# wait 10 s for ppp connection
sleep 10

# endless loop (for working via inittab)
while true; do

  # if not checked, the connection is assumed not to be ok
  flag=0

  # check for maximum connection time
  typeset -i timed=$[$time1-$time0]
  if [ $timed -gt $MAXIMUM_CONNECTION_TIME ]; then
    # terminate the connection
    poff
    # change the MAC
    /etc/network/if-up.d/macchanger2.sh eth1
    # start a new connection
    pon dsl-provider
    # restart squid and tor
    /etc/init.d/tor restart &
    /etc/init.d/squid restart &
    # wait for new connection
    sleep 10
  else
    # Check DSL connection with pings to the access point (DSLAM) and/or DNS servers.
    # Stop checking after first successfull ping.
    # Ping: don't fragment,  route with minimal cost, quiet, 5 s deadline and timeout (more does not make sense; th
    # connection is dead or nearly dead when a ping needs more than 5 s because usually it's about
    # 100 ms), 1 packet, net size 24, pattern 0x0f1e2d3c4b5a6978, no DNS lookup. Only 24 byte data (16 byte pattern
    # for measuring the round trip time.
    for HOST in 82.135.16.28 212.114.214.8 212.114.152.1 194.25.2.129 193.189.244.197; do
      { ping -M do -Q 0x02 -q -w 5 -W 5 -c 1 -s 24 -p 0f1e2d3c4b5a6978 -n $HOST &> /dev/null ; }&& flag=1 && break
    done
  fi

  # if no connection: restart connection, increase deadline_counter, init for maximum connection time
  if [ $flag -eq 0 ]; then
    deadline_counter=$[$deadline_counter +1]
    #time0=`expr \`date +%s\``
    time0=$(date +%s)
    time1=$time0

    # terminate the connection and do a fast start or reboot afterwards
    poff

    # server reboot at second deadline: Mail, shutdown after reboot in background for a deadline.
    if [ $deadline_counter -gt $SECOND_DEADLINE ]; then
      mail -s "Second deadline reached: Rebooting." root@localhost </dev/null ;
      reboot &
      sleep 30
      shutdown -r -t 30 now &
      sleep 30
      init 6
    fi

    # modem powercycle at first deadline(s), Mail
    if [ $[$deadline_counter % $FIRST_DEADLINE] -eq $[$FIRST_DEADLINE -1] ]; then
      /etc/network/if-up.d/macchanger2.sh eth1 &
      cd /root/bin/
      /bin/bash ./modem_powercycle.sh
      sleep 10
      mail -s "First deadline number reached at counter $deadline_counter: Modem powercycle done." root@localhost </dev/null ;
    else
      #echo "Switching on the modem"
      # Modem power on
      cd /root/bin/
      /bin/bash ./modemon.sh
    fi
    # start new connection
    pon dsl-provider
    # wait for new connection
    sleep 10
  else # connction is still up
    deadline_counter=0
    #time1=`expr \`date +%s\``
    time1=$(date +%s)
  fi

  # logging
  echo $i > "$dirname/$counterfilename"
  i=$(( $i + 1 ))
  touch $TOUCHFILE &
          
  # check several net services ######################################
  # check if "/usr/sbin/pppd call dsl-provider" is running
  if [ -z "`pgrep -x pppd`" ]; then
    poff
    pon dsl-provider
  fi

  if [ -z "`pgrep -x tor`" ]; then
    /etc/init.d/tor restart &
    echo "TOR restarted"
  fi

  if [ -z "`pgrep -U $UID -x sshd`" ]; then
    /etc/init.d/ssh restart &
    echo "sshd restarted"
  fi

  if [ -z "`pgrep -U $UID -x cron`" ]; then
    /etc/init.d/cron restart &
    echo "cron restarted"
  fi

  if [ -z "`pgrep -x squid`" ]; then
    /etc/init.d/squid restart &
    echo "squid restarted"
  fi

  if [ -z "`pgrep -x privoxy`" ]; then
    # /usr/bin/nice -n 19 /etc/init.d/tor restart &
    /etc/init.d/privoxy restart &
    echo "privoxy started"
  fi

  # wait 30 seconds before next round ###############################
  sleep 30

  # print the date for better logging
  date

  # reload tor's config for broadcasting
  if [ $[$i % $RELOADLINE] -eq $[$RELOADLINE -1] ]; then
    /etc/init.d/tor reload &
  fi

done

unlock

exit 0

