#!/bin/sh
# checkdsl: restart DSL connection if 100% ping loss to external servers.
# For use with inittab. (nobodyo<nospam>@</nospam>web.de)
# It uses only ips of access point and (name)servers to avoid the name resolving timeouts because they 
# enlarge the ping timeouts.
#
# DNS servers without ping: 62.96.128.66, 194.25.2.130
# Ok but not used: 194.162.162.194, 193.141.40.42, 195.202.33.68 193.189.224.2 194.183.128.36 212.44.160.8,
# 195.3.96.67 193.193.144.12 145.253.2.75, 134.60.1.111, 62.26.26.62,
# because for big downloads/uploads the ping timeout must be greater than 
# 7 s and four servers are enough and can be checked within a half minute (4*7s=28s). 
# 
# 2007: Added a first deadline with modem/router powercycle (http://www.true-random.com/homepage/projects/8fa/), 
#       and a second with a server reboot. At both deadlines a reporting mail is send to root.
#       Don't forget to eleminate BIOS passwords and configurations which stop booting!
#       Added the script stopdsl in /etc/crontab to synchronise this script with controlled termination and
#       restart of the DSL connection.
# 2008: Added Macchanger, switched to 5 s timeout because the FritzBox shows a constant increasing ping and
#       with a 10 Mbit fullduplex connection the ping with a DSL 16000 connection is lower with a working connection.
# 2009: Added killing of the pppd via signal 9 for defined termination. This reduced the number of powercycles per 
#       week more than 90 %!. Added killing of smpppd-ifcfg because it produces zombies which blocks the DSL connection.
#

# be verbose
set -x

FIRST_DEADLINE="3"   # Deadline of loops (approx. 50 s/loop) for Modem powercycle.
                      # The deadline is reached at <counter> modulo FIRST_DEADLINE equals 
                      # FIRST_DEADLINE-1. So with 5 the deadline is reached at 4, 9, 14, ...
SECOND_DEADLINE="34"  # deadline of loops for Server reboot: reboot will be done at <counter> > SECOND_DEADLINE

RELOADLINE="2"        # loop counter for torctl reload. 2 means reload every second loop.

MAXIMUM_CONNECTION_TIME="17280" # maximum connection time (default: 17280 seconds, two metric hours)

TOUCHFILE="/var/tmp/checkdsl.touchfile" # file for touching, for checking via a watchdog script/program


########## Lockfile Part #####################

sleeptime="1"           # sleeptime for creating the lockfile
retries="10"		# default number of retries of creating the lockfile: 10 (should be > locktimeout*sleeptime)
locktimeout="5"         # default timeout : 5 s. The lockfile will be removed
                        # by force after locktimeout seconds have passed since the lock-
                        # file was last modified/created. Lockfile is clock skew immune.
lockdir="/var/tmp"
# Eleminate the optional bash call with sed and get this process name from basename.
this_process="$(basename "$(ps -p $$ -o cmd= | sed 's/^[^ ]*bash //')")"
lockfile="$lockdir/lockfile.$this_process" # lockfile name

# ascertain whether we have lockf or lockfile system apps
check ()
{
  if [ -z "$(which lockfile | grep -v '^no ')" ] ; then
    echo "$0 failed: 'lockfile' utility not found in PATH." >&2
    exit 1
  fi
}


# make lockifle
lock () 
{
  typeset -i pid=0
  # check if a lockfile is present  
  if [ -f "$lockfile" ]; then 
    # check the PID in the lockfile
    pid="$(cat "$lockfile")"
    if [ $pid -eq 0 ]; then 
      echo "Could not read a valid PID from the lockfile."
      echo "Trying to remove that lockfile"
      echo "$lockfile"
      echo "."
      rm -f "$lockfile"
    else
      if kill -0 $pid 2> /dev/null; then
        echo "The locking executable with pid $pid appears to be already running."
	# init uses /bin/sh, so sed must be used with sh and not bash
#        locking_process="$(basename "$(ps -p $pid -o cmd= | sed 's/^[^ ]*sh //')")"
#        # check if the process with the found PID has the name of this skript (run this skript always without /bin/bash or comment the lines with the process name)
#        #if [ "$locking_process" == "$0" ] ; then
#        if [ "$locking_process" == "$this_process" ] ; then
#          echo "The locking executable has the same name (without the path) as this script"
#	  echo "$this_process"
#	  echo "."
#	  # check if the process with the found UID
	  if [ $(ps -p $pid -o uid=) == $UID ] ; then
	    echo "The locking process has been created from the same user $UID which is running this script; exiting."
	    exit 1
	  else
            echo "The locking process has been created from the different user"
            echo $(ps -p $pid -o uid=)
	    echo "; the user (UID) of this script is $UID."
	    # If you want to (try to) kill the blocking process, uncomment the following 3 lines.
	    echo "Try to kill this locking process."
  	    kill -9 $pid
	    rm -f "$lockfile"
            echo "Done killing and lockfile deletion."
	    # Maybe in the line before the next fi you should send an email to root@localhost that a user tried (or maybe caused)
	    # a DOS attack and that the blocking process (here undocumented because already killed) was killed.
          fi
#        else
#          echo "The locking executable"
#	  echo "$locking_process"
#	  echo "DOES NOT has the same name as this script,"
#	  echo "$this_process"
# 	  echo "."
#  	  echo "Trying to remove that lockfile $lockfile."
#	  rm -f "$lockfile"
#        fi
      else
        echo "The locking executable with pid $pid has completed or was killed without cleaning up its lockfile"
        echo "or the locking executable has another name than this script or it is run by an other user;"
        echo "removing that lockfile"
        echo "$lockfile"
        echo "."
        rm -f "$lockfile"
      fi
    fi
    else
      echo "no lockfile found"
  fi
  # (try to) create the lockfile; wait 
  if ! lockfile -$sleeptime -r $retries -l $locktimeout "$lockfile" 2> /dev/null; then
    echo "$0: Failed: Couldn't create lockfile in time" >&2
    exit 1
  fi
  chmod u+rw "$lockfile"
  # store the pid
  echo $$ > "$lockfile"
  chmod u-wx "$lockfile"
  # A trap to delete the lockfile when the script gets killed by SIGHUP SIGINT or SIGTERM.
  # In many cases, e. g. a kernel hangup, this does not work and the checks above are necessary.
  # Also wait some seconds before termination for the Modem Poweroff.
  trap "rm -f $lockfile; sleep 5; exit" SIGHUP SIGINT SIGTERM
}


# cleanup
unlock () 
{
  rm -f "$lockfile"
}


#################### "main" ##############################

dirname="$(dirname "$0")"
counterfilename="checkdsl.counter"

echo "Start of main part at"
date | xargs echo

# variables
# flag for connection ok
typeset -i flag
# endless loop cycle counter
typeset -i i=0
# tmp
typeset -i i_tmp=0

# initialize the deadline counter
typeset -i deadline_counter=0

# init for maximum connection time
#typeset -i time0=`expr \`date +%s\``
typeset -i time0=$(date +%s)
typeset -i time1=$time0

# Runlevel: Checks only at 3 <= runlevel <= 5
RL=$(/sbin/runlevel | awk '{ print $2 }')
if [ $RL -ge 3 -a $RL -le 5 ] ;
then
  echo "3 <= runlevel <= 5, ok" # ok
else
  echo "3 <= runlevel <= 5, is not fullfilled; exiting"
  exit 0
fi

# lockfile: first check, then set for locking
check
lock

# initialize the counter file
cp /dev/null "$dirname/$counterfilename"

# Uptime: Checks only min. 60 s after boot.
# Measured uptime at this point: 86 - 124 s
#UT=$(awk '{ print $1 }' /proc/uptime)
UT=$(sed "s#\..*##" /proc/uptime)
#echo "uptime: $UT"
if [ $UT -gt 150 ] ;
then
  echo "uptime $UT > 150 s"  # ok
else
  echo "uptime UT <= 150 s; waiting 10 seconds"
  # Modem power off 
  /root/c/8fa/1/modemoff.sh
  # change the MAC
  /etc/sysconfig/network/if-up.d/macchanger2.sh eth1 &
  sleep 5
  # Modem power on 
  /root/c/8fa/1/modemon.sh
  sleep 15
fi

# no check when a boot skript is running
#if [ "`ps ax | awk '{print $6}' | egrep "(^|/)boot.local"`" ]; then
#  exit 0
#fi

# wait 10 s for the modem/router or script
sleep 10

#echo "Switching on the modem" 
# Modem power on 
/root/c/8fa/1/modemon.sh

# endless loop (for working via inittab)
while true; do

  echo "Start of endless loop"

  # if not checked, the connection is assumed not to be ok
  flag=0
  echo $i > "$dirname/$counterfilename"
  i=$(( $i + 1 ))
  
  # Check ssh, dns2go and other net services
  if [ -z "`pgrep -U $UID -x sshd`" ]; then
    /usr/sbin/sshd
    echo "sshd restarted"
  fi

  if [ -z "`pgrep -U $UID -x cron`" ]; then
    rccron restart
    echo "cron restarted"
  fi

#  if [ -z "`pgrep -U $UID -x apcupsd`" ]; then
#    nice -19 rcapcupsd stop
#    sleep 1 
#    killall -15 apcupsd # cleanup; necessary!
#    killall -15 apccontrol
#    killall -15 apcaccess
#    sleep 0.5 
#    nice -19 rcapcupsd start
#    echo "apcupsd restarted"
#  fi

  #if [ -z "`pgrep -U $UID -x dns2go`" ];
  #then
  #  nice -n 19 /usr/local/bin/dns2go &
  #  echo "dns2go restarted"
  #fi

#  if [ -z "`pgrep -U $UID -x httpd2-prefork`" ];
#  then
#    nice -n 19 rcapache2 restart &
#    echo "Apache2 restarted"
#  fi

#  if [ -z "`pgrep -U $UID -x proftpd`" ];
#  then
#    /usr/bin/nice -n 19 /usr/local/sbin/proftpd start &
#    echo "Proftpd started"
#  fi

  if [ -z "`pgrep -x squid`" ]; then
    /usr/sbin/rcsquid start &
    echo "squid started"
  fi

  # reload tor's config and broadcast
  if [ $[$i % $RELOADLINE] -eq $[$RELOADLINE -1] ]; then
    /usr/bin/torctl reload
  fi

  if [ -z "`pgrep -x tor`" ]; then
    # /usr/bin/nice -n 19 /etc/init.d/tor restart &
    /usr/bin/torctl start &
    echo "TOR started"
  fi

  if [ -z "`pgrep -x privoxy`" ]; then
    # /usr/bin/nice -n 19 /etc/init.d/tor restart &
    /usr/sbin/rcprivoxy start &
    echo "privoxy started"
  fi

  echo "Checking DSL connection"

  # check for maximum connection time 
  typeset -i timed=$[$time1-$time0]
  if [ $timed -gt $MAXIMUM_CONNECTION_TIME ]; then
    # terminate the connection, clear connected flag
    cinternet --interface-name=dsl0 -A --verbose --stop
    killall -15 pppd &>/dev/null 
    killall -15 smpppd-ifcfg &>/dev/null
    killall -15 cinternet &>/dev/null 
    sleep 3
    killall -9 pppd &>/dev/null 
    
    # change the MAC
    /etc/sysconfig/network/if-up.d/macchanger2.sh eth1

    # restart squid and tor
    /usr/sbin/rcsquid restart &
    /usr/bin/torctl restart &

    # do a powercycle of the fritzbox  because the fritzbox 7*** does need a reboot after about 6 h connection 
    #cd /root/c/8fa/1/
    #/bin/bash ./modem_powercycle.sh
    #sleep 10
    
    flag=0
  else
    # Check DSL connection with pings to the access point (DSLAM) and/or DNS servers.
    # Stop checking after first successfull ping.
    # Ping: don't fragment,  route with minimal cost, quiet, 5 s deadline and timeout (more does not make sense; the 
    # connection is dead or nearly dead when a ping needs more than 5 s because usually it's about
    # 100 ms), 1 packet, net size 24, pattern 0x0f1e2d3c4b5a6978, no DNS lookup. Only 24 byte data (16 byte pattern)
    # for measuring the round trip time. 
    for HOST in 82.135.16.28 212.114.214.8 212.114.152.1 194.25.2.129 193.189.244.197; do
      { ping -M do -Q 0x02 -q -w 5 -W 5 -c 1 -s 24 -p 0f1e2d3c4b5a6978 -n $HOST &> /dev/null ; }&& flag=1 && break
    done
  fi

  touch $TOUCHFILE &

  # if no connection: restart connection, increase deadline_counter, init for maximum connection time
  if [ $flag -eq 0 ]; then
    deadline_counter=$[$deadline_counter +1]
    #time0=`expr \`date +%s\``
    time0=$(date +%s)
    time1=$time0

    # terminate the connection and do a fast start or reboot afterwards
    #  cinternet --verbose --hangup
    cinternet --interface-name=dsl0 -A --verbose --stop &
    killall -15 pppd &>/dev/null 
    sleep 3
    killall -9 pppd &>/dev/null 
    # nohup /bin/terminate smpppd &>/dev/null &

    # server reboot at second deadline: Mail, shutdown after reboot in background for a deadline.   
    if [ $deadline_counter -gt $SECOND_DEADLINE ]; then
      mail -s "Second deadline reached: Rebooting." root@localhost </dev/null ;
      #init 6&
      reboot&
      sleep 30
      shutdown -r -t 30 now
    fi

    # modem powercycle at first deadline(s), Mail
    if [ $[$deadline_counter % $FIRST_DEADLINE] -eq $[$FIRST_DEADLINE -1] ]; then
      /etc/sysconfig/network/if-up.d/macchanger2.sh eth1 &      
      cd /root/c/8fa/1/
      /bin/bash ./modem_powercycle.sh
      sleep 10
      mail -s "First deadline number reached at counter $deadline_counter: Modem powercycle done." root@localhost </dev/null ;
    else
      #echo "Switching on the modem" 
      # Modem power on
      cd /root/c/8fa/1/
      /bin/bash ./modemon.sh
    fi

    # check smpppd
    #if [ -z "`pgrep -U $UID -x smpppd`" ];
    #then
    #  /usr/sbin/smpppd
    #  echo "smpppd restarted"
    #fi
    rcsmpppd restart

    # restart connection    
    # next line for internal modem
    # /usr/sbin/rcsmpppd restart

    # next line for external modem
    cinternet --interface-name=dsl0 -A --verbose --dialin

    # wait 30 seconds for connection
    sleep 30

    echo "Restarting most internet services at"
    date | xargs echo

    # restart tor via inittab
    #/etc/init.d/tor stop 

    # restart apache2 because of some buggy modules
#    nice -n 19 rcapache2 restart

    # restart proftpd
    #/usr/bin/nice -n 19 /usr/local/sbin/proftpd restart 

    # wait for restarting via inittab
    # sleep 1 

    # nohup /bin/terminate checkdsl.sh &>/dev/null &
    # cinternet --verbose --dialin
    # /usr/sbin/pppd logfd 17 call pppoe eth1 mtu 1492 mru 1492 ipcp-accept-local ipcp-accept-remote idle 999999 defaultroute replacedefaultroute nodetach usepeerdns user a020133 passwordd 20
    # /usr/sbin/pppd logfd 7 call pppoe eth1 mtu 1492 mru 1492 192.168.99.1:192.168.99.99 ipcp-accept-local ipcp-accept-remote demand idle 300 defaultroute replacedefaultroute nodetach usepeerdns user a902013 passwordd 8
    # /usr/sbin/rcsmpppd restart
    # echo 1 # for testing

    killall dns2go &
    killall inadyn &
       
    /bin/bash /root/bin/renicenames.sh pppd -5 &
    /bin/bash /root/bin/renicenames.sh httpd2-prefork 19 &
    #    /bin/bash /root/bin/renicenames.sh proftpd 19 &
    #/bin/bash /root/bin/renicenames.sh tor 19 &
  else 
    # connction is still up
    deadline_counter=0
    #time1=`expr \`date +%s\``
    time1=$(date +%s)
    # ping to the modem 
    #ping -M do -Q 0x02 -q -w 7 -c 1 -s 24 -p 0f1e2d3c4b5a6978 -n 192.168.178.1 &
  fi 

  echo "Waiting some seconds before next round"

  # wait 30 seconds before next round
  sleep 30

done

unlock

exit 0

