../tarefls

TaRefLs

Introduction

a couple of months ago, i was just wondering if there is any way to get easy to read information on who is refering to my website without using any huge gui tools or such things. shortly after i wrote a script to do so. overall, nothing spectacular…but IMHO very usefull if you are as nosy as i am. =)

Description

This perl script aims to do a formated and sorted output of apaches referers. it can work on both apache1 and apache2 logfiles.

sure you have to change some paths. written and tested on Debian GNU/Linux 3.1;

Source

#!/usr/bin/perl

# ./tarefls.pl
# Version 1.0

#Jul 17, 2006 Thomas
#Nicer and organized (more human readable)output
#of apaches referers

#Copyright (C) 2006 Thomas
#
#This program is free software; you can redistribute it and/or
#modify it under the terms of the GNU General Public License
#as published by the Free Software Foundation; either version 2
#of the License, or any later version.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
##MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program; if not, write to the Free Software
#Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA

#use PerlIO::gzip;
use IO::Compress::Gzip qw(gzip $GzipError);
use strict;
use Getopt::Long;

#var def
my $show;
my $sort;
my %data;
my $requestedfile;
my %month = (
        Jan => '1',
        Feb => '2',
        Mar => '3',
        Apr => '4',
        May => '5',
        Jun => '6',
        Jul => '7',
        Aug => '8',
        Sep => '9',
        Oct => '10',
        Nov => '11',
        Dec => '12',
);

my @files;
GetOptions(
"show=s" => \$show,
"sort=s" => \$sort,
);

#USAGE sub
sub usage
{
print "$0  [--show=all|mostrecent] [--sort=date|count|url]\n";
}

#print usage and exit unless they make a choice
unless ($show && $sort) { &usage; exit 1; }

#decision have to be made
if ($show eq "all" && $sort eq "date") {@files = ; &getfiles; &sortbydate_and_print;}
elsif ($show eq "mostrecent" && $sort eq "date") {@files = ; &getfiles; &sortbydate_and_print;}
elsif ($show eq "all" && $sort eq "count") {@files = ; &getfiles; &sortbycount_and_print;}
elsif ($show eq "mostrecent" && $sort eq "count") {@files = ; &getfiles; &sortbycount_and_print;}
elsif ($show eq "all" && $sort eq "url") {@files = ; &getfiles; &sortbyurl_and_print;}
elsif ($show eq "mostrecent" && $sort eq "url") {@files = ; &getfiles; &sortbyurl_and_print;}
else {&usage; exit 2; }

#extract data and store to hash in hash
sub extract_and_store {
#MyCodeNote# ?: = dont remember $1 in brackets
if (/GET.+\"(http.+(?:80\.78\.242\.253|peek-a-boo).+)\"\s\"/) {next;}
elsif (/\d+\.\d+\.\d+\.\d+.+\[(\d*)\/(\w*)\/(\d*)\:(\d*)\:(\d*).+\].+GET\s(.+)\sHTTP.+\"(http.+(\w*|\d*))\"\s\"/)
        {
        my $day = $1;
        my $mon = $2;
        my $year = $3;
        my $hour = $4;
        my $min = $5;
        my $requestedfile = $6;
        my $url = $7;

        $data{$url}{"count"}++;
        $data{$url}{"requestedfile"} = $requestedfile;

        if (!$data{$url}{date})
                {
                $data{$url}{date}="$year.$month{$mon}.$day $hour:$min";
                }
                #print "DBG: $data{$url}{date}\n";
        }
}

#sub open files read them and run  extract_and_store
sub getfiles {
foreach (@files)
        {
        #print "DBG: $_\n\n";
        #open FILE, "<:gzip", $_ or die $! if /.+\.gz$/;
        open FILE, $_ or die $! if !/.+\.gz$/;
        while ()
                {
                extract_and_store();
                }
        close (FILE);
        }
}

#keys %data gives all urls.
#sort by date and time and print that shit
sub sortbydate_and_print {
foreach (sort {$data{$a}{date} cmp $data{$b}{date}} keys %data)
        {
        # DBG foreach (keys %data) {
        print "$data{$_}{date} | count: $data{$_}{count} | $_\n Requested Location: $data{$_}{requestedfile}\n";
        }
}
#sort by count and time and print that shit
sub sortbycount_and_print {
foreach (sort {$data{$a}{count} <=> $data{$b}{count}} keys %data)
        {
        # DBG foreach (keys %data) {
        print "$data{$_}{date} | count: $data{$_}{count} | $_\n Requested Location: $data{$_}{requestedfile}\n";
        }
}
#sort by url and time and print that shit
sub sortbyurl_and_print {
foreach (sort {$a cmp $b} keys %data)
        {
        # DBG foreach (keys %data) {
        print "$data{$_}{date} | count: $data{$_}{count} | $_\n Requested Location: $data{$_}{requestedfile}\n";
        }
}