#!/bin/bash
# clean1.sh: Simply cleanup of an input list ($1, the argument of this script)
# in-place.
# Dr. Rolf Freitag. License: Hacktivismo Enhanced-Source Software License Agreement (HESSLA),
# see http://www.hacktivismo.com/about/hessla.php
# Version 1.1, 2009-10-04

# be verbose
set -x

if [ "$#" -ne 1 ]; then
  echo "Error: Not one parameter (the input and output file); exiting!"
  exit -1
fi

# assure correct line ends
dos2unix "$1"

# tmpfile(s)
tmpfile1="tmpfile1$$$RANDOM"

# delete both leading and trailing whitespace(s) from each line
sed 's/^[ \t]*//;s/[ \t]*$//' "$1" > "$tmpfile1"
mv "$tmpfile1" "$1"

# delete http:// at the beginning of each line
sed 's#http://##' "$1" > "$tmpfile1"
mv "$tmpfile1" "$1"

# sort and make each line unique by deleting duplicate lines
sort "$1" > "$tmpfile1"
uniq "$tmpfile1" > "$1"

# make clean
rm "$tmpfile1"

