#!/usr/bin/perl # # $Id: weblog,v 1.4 2007/03/14 19:53:46 cos Exp cos $ # # Pretty-printer for Apache Combined style web logfiles # first argument: hostname of the local site, for less verbose local referrers # second argument: dotted quad of your own IP address, to suppress its entries # # Code has a bunch of special text-munging rules for LiveJournal, Google, etc. # Take a look and add your own. # # To watch a live logfile: tail -f /web/logs/access.log | weblog mydomain.com use Socket 'AF_INET'; $debug=0; # set $format to 'csv' for CSV output $format='text'; local $|=1; my %hname, $hname; # per-run DNS reverse lookup cache # formatted for 132 column display - does not apply to CSV format output my $hwidth = 36; my $fwidth = 20; my $rwidth = 42; my $localsite = shift; $localsite ||= 'cos.polyamory.org'; my $localip = shift; $localip ||= '66.30.25.165'; %mnum = ('Jan'=>'01', 'Feb'=>'02', 'Mar'=>'03', 'Apr'=>'04', 'May'=>'05', 'Jun'=>'06', 'Jul'=>'07', 'Aug'=>'08', 'Sep'=>'09', 'Oct'=>'10', 'Nov'=>'11', 'Dec'=>'12'); while (<>) { my ($ip,$mday,$mon,$year,$time,$file,$ref) = m#^(\d+\.\d+\.\d+\.\d+) \S+ \S+ \[(\d+)/(\w+)/(\d+):(\d\d:\d\d:\d\d).*?(\S+) *HTTP/..." \S+ \S+ "([^"]+)"# or warn "BAD LOG ENTRY: $_" and next; next if $file =~ /favicon.ico/; next if $file =~ m#/I/.*\.gif#; next if $file =~ m#/P/.*\.gif#; next if $file =~ m#/P.*/t\w+\.jpg#; next if $ip eq $localip; my $mnum = $mnum{$mon} || '??'; my $date = "$year$mnum$mday-$time"; my $host = substr hostlookup($ip),-$hwidth; if ($format eq 'text') { local $_ = $ref; s#http://##; s#(www.)?$localsite/#/#; s#(www.)?livejournal.com/#LJ:/#; s#(\w+).livejournal.com/#LJ:$1/#; s#(www.)?bluemassgroup.(typepad.)?(com|org)/#BMG:#; s#images.google.([\w\.]+)/imgres\?imgurl=http://#gimg.$1:#; s#images.search.yahoo.com/search/images/#yimg:#; s#(\w+\.)?google.([a-z]+)/#google:#; s#search.yahoo.com/#ysearch:#; s#search.msn.([a-z]+)/results.aspx#msn:#; s#google.yahoo.com#ygoogle#g; s#www.dogpile.com/info.dogpl/#dogpile:#; if(m#google:search#) {($q)=/[\?&_](q=[^\?&]*)/; s#google:search.+#google:$q# } if(m#ysearch:search#) {($q)=/[\?&_](p=[^\?&]*)/; s#ysearch:search.+#ysearch:$q# } if(m#msn:#) {($q)=/[\?&](q=[^\?&]*)/; s#msn:.+#msn:$q# } if(m#dogpile:#) { s#search/web/##i } $ref=$_; $file =~ s#/imghost/#I:#; substr($ref,$rwidth-2)='#>' if length($ref)>$rwidth; substr($file,$fwidth-4)='*'.substr($file,-3) if length($file)>$fwidth; printf "$date %${hwidth}s %-${fwidth}s %-${rwidth}s\n", $host, $file, $ref; } elsif ($format eq 'csv') { print "$date,$ip,$host,\"$file\",\"$ref\"\n" } } sub hostlookup { my $ip = shift; my $addr = pack("C4", split(/\./, $ip)); unless ($hname = $hname{$addr}) { $hname = gethostbyaddr $addr, AF_INET; $hname = "[$ip]" unless $hname; $hname{$addr} = $hname; warn "*** resolved $ipadr -> $hname\n" if $debug; } return $hname; }