From: Michael Meier Date: Thu, 10 Sep 2015 12:46:51 +0000 (+0200) Subject: script for parsing apache and tirex logs into a db. X-Git-Url: http://git.rrze.uni-erlangen.de/gitweb/?p=osmrrze.git;a=commitdiff_plain;h=f9d757fc71a72034d23086dced3227ae83d51bec script for parsing apache and tirex logs into a db. --- diff --git a/scripts/collectlogsintodb.pl b/scripts/collectlogsintodb.pl new file mode 100755 index 0000000..8092c7e --- /dev/null +++ b/scripts/collectlogsintodb.pl @@ -0,0 +1,231 @@ +#!/usr/bin/perl -w + +# Where is the ftp stats database? +$statsdb = 'osmstats'; + +# How many tiles are a metatile? These get merged / counted as one tile. +# If you really want to get stats for individual tiles and not per metatile, +# just set this to 1. +$metatilesize = 8; + +%stylemap = ( 'tiles' => 1, + 'osmde' => 2, + 'lowzoom' => 3, + 'osmhd' => 4 + ); + +%mapmap = ( 'osm' => 1, + 'osmde' => 2, + 'osmhd' => 4 + ); + +# ----------------------------------------------------------------------------- + +use DBI; +use POSIX qw(strftime mktime); + +# Parameter 0: table name +# Returns: 1 if the table exists, or 0 if not. +sub dbtableexists($) { + my $cnt = $dbh->selectrow_array("SELECT COUNT(*) FROM pg_tables" . + " WHERE tablename=" . $dbh->quote($_[0])); + if ((defined($cnt)) && ($cnt > 0)) { + return 1; + } else { + return 0; + } +} + +# Initializes database if it doesn't exist yet +sub doinitdb() { + unless (dbtableexists('tilerequests')) { + my $res = $dbh->do( <do( <connect("dbi:Pg:dbname=$statsdb","","")) { + print(STDERR "Failed to open database. Please try again later.\n"); exit(1); +} +# Create database tables if they do not exist yet +doinitdb(); +my $FILENAME = '-'; +my $RUNMODE = 0; +for ($i = 0; $i < @ARGV; $i++) { + if ($ARGV[$i] eq '--help') { + showhelp(); exit(0); + } elsif ($ARGV[$i] eq '-h') { + showhelp(); exit(0); + } elsif ($ARGV[$i] eq '--tirex') { + $RUNMODE = 2; + } elsif ($ARGV[$i] eq '--apache') { + $RUNMODE = 1; + } else { + unless ($FILENAME eq '-') { + print("ERROR: At most one filename can be given on the command line.\n"); + showhelp(); exit(1); + } + $FILENAME = $ARGV[$i]; + } +} +unless (($RUNMODE >= 1) && ($RUNMODE <= 2)) { + print("ERROR: You must select a logfile mode (--apache or --tirex)\n"); + showhelp(); exit(1); +} +if ($FILENAME eq '-') { + $ALF = STDIN; +} else { + unless (open($ALF, '<', $FILENAME)) { + print(STDERR "ERROR: Could not open logfile $FILENAME\n"); + exit(1); + } +} +if ($RUNMODE == 1) { # Apache + my %ctr = (); + my $nlines = 0; + while ($ll = <$ALF>) { + $ll =~ s/[\r\n]//g; + # 127.0.0.0 - - [23/Sep/2013:17:25:23 +0200] "GET /server-status?auto HTTP/1.1" 200 2431 "-" "libwww-perl/6.03" + #print("$ll\n"); + # If there is a " in the URL or referrer or user agent, it is escaped by + # apache with '\"' - that makes it pretty hard to parse with a regexp. + # Therefore, work around the problem by replacing the \" with something else. + $ll =~ s/\\"/\%22/g; # HTML code for the " + if ($ll =~ m/^([^ ]+) ([^ ]+) ([^ ]+) \[([^\]]+)\] "([^"]+)" ([^ ]+) ([^ ]+)/) { + my $srcip = $1; + my $statuscode = $6; + my $bytessent = $7; + my $filename = $5; + if ($filename !~ m/^GET /) { next; } + $filename =~ s/^GET //g; + $filename =~ s!(http|https)://[^/]*!!g; + $filename =~ s! HTTP/\d\.\d$!!g; + $filename =~ s!\?[^ ]*$!!g; + if (($statuscode !~ m/^2../) && ($statuscode !~ m/^304/)) { + # only count successful requests. 304 is 'not modified', so a successful request. + next; + } + if ($filename =~ m!([^/]+)/(\d+)/(\d+)/(\d+)\.png!) { + my $sn = $1; my $z = $2; my $x = $3; my $y = $4; + my $s = $stylemap{$sn}; + unless (defined($s)) { next; } + $x = $x & ~($metatilesize - 1); + $y = $y & ~($metatilesize - 1); + #print("tilestyle: $sn (# $s) $x $y $z\n"); + if (defined($ctr{$s}{$z}{$y}{$x})) { + $ctr{$s}{$z}{$y}{$x}++; + } else { + $ctr{$s}{$z}{$y}{$x} = 1; + } + $nlines++; + } + } + } + close($ALF); + my $ninserts = 0; + my $nupdates = 0; + foreach $s (keys(%ctr)) { + my %h1 = %{$ctr{$s}}; + foreach $z (keys(%h1)) { + my %h2 = %{$h1{$z}}; + foreach $y (keys(%h2)) { + my %h3 = %{$h2{$y}}; + foreach $x (keys(%h3)) { + #print("$s $z $y $x $h3{$x}\n"); + my $date = strftime("%Y-%m-%d", localtime(time() - 86400)); + local $dbh->{'PrintError'}; # We're fully aware that the execute can + local $dbh->{'PrintWarn'}; # fail, no need to spam about it. + unless ($dbh->do('INSERT INTO tilerequests(styleid, date, z, y, x, requests)' . + ' VALUES(' . $s . ',' . $dbh->quote($date) . ',' . + $z . ',' . $y . ',' . $x . ',' . $h3{$x} . ')')) { + # Try again with update + unless ($dbh->do('UPDATE tilerequests SET requests=requests+' . $h3{$x} . + ' WHERE styleid=' . $s . ' AND date=' . $dbh->quote($date) . + ' AND z=' . $z . ' AND y=' . $y . ' AND x=' . $x)) { + print(STDERR "Both INSERT and UPDATE to DB failed: $DBI::errstr\n"); + exit(1); + } else { + $nupdates++; + } + } else { + $ninserts++; + } + } + } + } + } + print("Done. $nlines relevant lines of logfile were handled with $ninserts DB inserts and $nupdates DB updates.\n"); +} +if ($RUNMODE == 2) { # tirex logfile + my $nreqs; + while ($ll = <$ALF>) { + my $reqtime; my $map; my $x; my $y; my $z; my $rendertime; + $ll =~ s/[\r\n]//g; + # 2015-09-07T13:37:58 id=1441625875_43436056 map=osmde x=34816 y=22928 z=16 prio=20 request_time=1441625875 expire= sources=MMMMMMMM render_time=3277 success=1 + unless ($ll =~ m/\ssuccess=1/) { + next; # We do not care about failed requests. + } + if ($ll =~ m/\srequest_time=(\d+)\s/) { + $reqtime = $1; + } else { + next; + } + if ($ll =~ m/\smap=([^ ]+)\s/) { + $map = $mapmap{$1}; + unless (defined($map)) { next; } + } else { + next; + } + if ($ll =~ m/\srender_time=(\d+)\s/) { + $rendertime = ($1 / 1000.0); + } else { + next; + } + if ($ll =~ m/\sx=(\d+)\s/) { $x = $1; } else { next; } + if ($ll =~ m/\sy=(\d+)\s/) { $y = $1; } else { next; } + if ($ll =~ m/\sz=(\d+)\s/) { $z = $1; } else { next; } + #print("$map $z $y $x $rendertime\n"); + unless ($dbh->do('INSERT INTO renderrequests(mapid, ts, z, y, x, rendertime)' . + ' VALUES(' . $map . ',' . $reqtime . ',' . $z . ',' . + $y . ',' . $x . ',' . $rendertime . ')')) { + print(STDERR "Failed to insert renderrequest into DB: $DBI::errstr\n"); + } + $nreqs++; + } + close($ALF); + print("Done. Inserted $nreqs entries into DB.\n"); +}