#!/bin/bash
# clean2.sh: Simple script for extracting the domain names in an input list ($1, the argument 
# of this script) in-place (after cleaning with clean1.sh).
# Dr. Rolf Freitag. License: Hacktivismo Enhanced-Source Software License Agreement (HESSLA),
# see http://www.hacktivismo.com/about/hessla.php
# Version 1.1, 2009-10-04

# be verbose
set -x

if [ "$#" -ne 1 ]; then
  echo "Error: Not one parameter (the input and output file); exiting!"
  exit -1
fi

# tmpfile(s)
tmpfile2="tmpfile2$$$RANDOM"

# delete everything behind the first "/", e. g. to extract the domain name
sed -e 's#/.*$##' "$1" > "$tmpfile2"
mv "$tmpfile2" "$1"

# sort and make each line unique by deleting duplicate lines
sort "$1" > "$tmpfile2"
uniq "$tmpfile2" > "$1"

# make clean
rm "$tmpfile2"

