#!/usr/bin/perl -w

# OSM tile cleanup / maintenance script. Big parts are a copied from
# hochwasserloeschung.pl.

use Fcntl ':mode';
use POSIX 'strftime';

$zoommin = 0;
$zoommax = 20;
$filelimit = -1;
$rmemptydirs = 0;
$action = 0;
$rrs = '/bin/echo';
$minage = 0;  # In hours!
$setbackinterval = 7305 * 86400; # 20 years (including leap years)

# Sort function for candidate list
sub sortbydateasc {
  #print("$a->[1] $b->[1]\n");
  return ($a->[1] <=> $b->[1]);
}

# Remembers a candidate for deletion, if required sorting and purging out the
# list.
# Parameters: 0   filename
#             1   atime
#             2   mtime
#             3   type field from stat (containing DIR, LINK etc.)
#             4   uid
#             5   gid
#             6   size in bytes
sub remembercandidate($$$$$$$) {
  if (($_[4] == 0) || ($_[5] == 0)) {
    # we do not touch roots files or directories.
    return;
  }
  if ($_[2] > (time() - ($minage * 3600))) {
    #print("not adding $_[0], too new\n");
    return;
  }
  $totalfilesseen++;
  push(@allentries, [ $_[0], $_[2], $_[3], $_[4], $_[5], $_[6] ]);
}

# Par. 0: Path were we start
# Par. 1: Recursion counter
# Returns: Number of files/dirs found (non-recursively!)
sub runrecursive($$);
sub runrecursive($$) {
  my $DIR;
  my $pth = $_[0];
  my $curdirentry;
  my @statres;
  my $recctr = $_[1];
  my $direntries = 0;
  unless (opendir($DIR, $pth)) {
    print("ERROR: Failed to open dir $pth\n");
    exit(1);
  }
  while ($curdirentrys = readdir($DIR)) {
    if (($curdirentrys eq '.') || ($curdirentrys eq '..')) {
      next;
    }
    $direntries++;
    $curdirentryf = $pth.'/'.$curdirentrys;
    @statres = lstat($curdirentryf);
    unless (@statres > 12) {
      # This usually happens if we have a symlink to a file for which we have
      # no permission to read.
      #print("stat failed for $curdirentry\n");
      next;
    }
    if (S_ISLNK($statres[2])) { next; } # A symlink? We no like.
    if ($recctr <= 4) { # There should be no files on these levels
      unless (S_ISDIR($statres[2])) { next; }
      unless ($curdirentrys =~ m/^\d+$/) { next; }
    } else { # whereas there should be only files called .meta on level 5.
      unless (S_ISREG($statres[2])) { next; }
      unless ($curdirentrys =~ m/^\d+\.meta$/) { next; }
    }
    if ($recctr == 0) {
      my $z = int($curdirentrys);
      unless (($z >= $zoommin) && ($z <= $zoommax)) { next; }
      runrecursive($curdirentryf, $recctr+1);
    } elsif ($recctr <= 4) {
      if (runrecursive($curdirentryf, $recctr+1) == 0) { # Empty dir!
        if ($rmemptydirs) {
          print("RMDIR: $curdirentryf\n");
          rmdir($curdirentryf);
        }
      }
    } else {
      remembercandidate($curdirentryf, $statres[8], $statres[9], $statres[2], $statres[4], $statres[5], $statres[12]);
    }
  }
  closedir($DIR);
  return $direntries;
}

# Par. 0: Full path of file
sub rerenderfile($) {
  my $fpth = $_[0];
  # We first need to figure out the relevant numbers from the full file path
  unless ($fpth =~ m!/(\d+)/(\d+)/(\d+)/(\d+)/(\d+)/(\d+)\.meta$!) {
    print("WARNING: rerenderfile: Failed to extract path components for rerendering out of '$fpth' - skipping\n");
    return;
  }
  my $zl = $1; my @p = ( $2, $3, $4, $5, $6 );
  my $calcx = 0; my $calcy = 0;
  my $i;
  for ($i = 0; $i < 5; $i++) {
    $calcx = ($calcx << 4) | (($p[$i] & 0xf0) >> 4);
    $calcy = ($calcy << 4) | (($p[$i] & 0x0f) >> 0);
  }
  #  struct meta_layout {
  #    char magic[4];        // 'M' 'E' 'T' 'A'
  #    int count;            // METATILE ^ 2
  #    int x, y, z;          // lowest x,y of this metatile, plus z
  my $fi;
  unless (open($fi, '<', $fpth)) {
    print("WARNING: rerenderfile: Failed to open file '$fpth' - skipping\n");
    return;
  }
  binmode($fi);
  my $dbuf;
  unless (read($fi, $dbuf, 20)) {
    print("WARNING: rerenderfile: Failed to read info from metatile '$fpth' - skipping\n");
    return;
  }
  close($fi); undef($fi);
  unless (substr($dbuf, 0, 4) eq 'META') {
    print("WARNING: rerenderfile: file '$fpth' is not a metatile - skipping\n");
    return;
  }
  unless (ord(substr($dbuf, 4, 1)) == 64) {
    print("WARNING: rerenderfile: file '$fpth' is not a 8x8 metatile - skipping\n");
    return;
  }
  my $fx = (ord(substr($dbuf,  8, 1)) <<  0) | (ord(substr($dbuf,  9, 1)) <<  8)
         | (ord(substr($dbuf, 10, 1)) << 16) | (ord(substr($dbuf, 11, 1)) << 24);
  unless ($fx == $calcx) {
    print("WARNING: rerenderfile: file '$fpth' is invalid - xsize $fx != $calcx - skipping\n");
    return;
  }
  my $fy = (ord(substr($dbuf, 12, 1)) <<  0) | (ord(substr($dbuf, 13, 1)) <<  8)
         | (ord(substr($dbuf, 14, 1)) << 16) | (ord(substr($dbuf, 15, 1)) << 24);
  unless ($fy == $calcy) {
    print("WARNING: rerenderfile: file '$fpth' is invalid - xsize $fy != $calcy - skipping\n");
    return;
  }
  my $fz = (ord(substr($dbuf, 16, 1)) <<  0) | (ord(substr($dbuf, 17, 1)) <<  8)
         | (ord(substr($dbuf, 18, 1)) << 16) | (ord(substr($dbuf, 19, 1)) << 24);
  unless ($fz == $zl) {
    print("WARNING: rerenderfile: file '$fpth' is invalid - zsize $fz != $zl - skipping\n");
    return;
  }
  print("Sending rendering request for z=$zl x=$calcx y=$calcy to regenerate '$fpth'\n");
  if (system("$rrs $zl $calcx $calcy")) {
    print("Error executing $rrs $zl $calcx $calcy\n");
  }
}

# Par. 0: x
# Par. 1: y
sub calcpathfromcomponents($$) {
  my @res = ();
  my $i; my $x = $_[0]; my $y = $_[1];
  for ($i = 4; $i >= 0; $i--) {
    $res[$i] = sprintf("%d", (($x & 0x0f) << 4) + ($y & 0x0f));
    $x = $x >> 4;
    $y = $y >> 4;
  }
  return $res[0] . "/" . $res[1] . "/" . $res[2] . "/" . $res[3] . "/" . $res[4];
}

sub dohandleexpiredlist() {
  my $ll;
  %rerenderlist = ();
  while ($ll = <STDIN>) {
    if ($ll =~ m!^(\d+)/(\d+)/(\d+)$!) {
      my $x = $2; my $y = $3; my $z = $1;
      #print("Handling z=$z x=$x y=$y\n");
      if ($z != ($zoommax - 3)) {
        print("Ignoring z=$z x=$x y=$y because of wrong zoom\n");
        next;
      }
      my $cz; my $cx; my $cy; my $curz;
      for ($curz = $zoommin; $curz <= $zoommax; $curz++) {
        $cz = $z; $cx = $x; $cy = $y;
        while ($cz < $curz) {
          $cz++; $cx <<= 1; $cy <<= 1;
        }
        while ($cz > $curz) {
          $cz--; $cx >>= 1; $cy >>= 1;
        }
        #print("Matching tile at z=$cz: x=$cx y=$cy -");
        $cx = $cx & 0xfff8; $cy = $cy & 0xfff8;
        #print(" rounded to x=$cx y=$cy\n");
        $rerenderlist{$cz}{$cx}{$cy} = 1;
      }
    }
  }
  my $filesdone = 0; my $filesseen = 0; my $filesalreadytouched = 0;
  foreach $z (sort(keys(%rerenderlist))) {
    foreach $x (sort(keys(%{$rerenderlist{$z}}))) {
      foreach $y (sort(keys(%{$rerenderlist{$z}{$x}}))) {
        my $p = ${fspath} . '/' . $z . '/' . calcpathfromcomponents($x, $y) . '.meta';
        #print("Checking: $z $x $y - Path: $p\n");
        $filesseen++;
        if (-e $p) {
          if ($action == 3) {
            $filesdone++;
            print("Sending rendering request for z=$z x=$x y=$y to regenerate '$p'\n");
            if (system("$rrs $z $x $y")) {
              print("Error executing $rrs $z $x $y\n");
            }
          } elsif ($action == 4) {
            my $mtime = (stat($p))[9];
            my $curtime = time();
            if (($curtime - $setbackinterval) > $mtime) {
              # Do not touch again - it's already 20 years back.
              #print("Not touching '$p', it's over 20 years old so probably has already been set back.\n");
              $filesalreadytouched++;
            } else {
              $filesdone++;
              my $newmtime = $mtime - $setbackinterval;
              print("Touching '$p' (z=$z x=$x y=$y)\n");
              if (utime($curtime, $newmtime, $p) < 1) {
                print("Error touching '$p': $!\n");
              }
            }
          } else {
            print("Internal error - action variable invalid. This is a programming error.\n");
            exit(1);
          }
        }
      }
    }
  }
  if ($action == 3) {
    print("Sent re-rendering-requests for $filesdone files that actually existed (of $filesseen candidates)\n");
  } elsif ($action == 4) {
    print("Touched $filesdone files ($filesseen candidates, $filesalreadytouched already touched)\n");
  }
}

$fspath = '';
for ($i = 0; $i < @ARGV; $i++) {
  $curarg = $ARGV[$i];
  if      ($curarg eq '--help') {
    print("Syntax: $0 [--help] [--zoom z] [--limit n] [--action a] [--rmemptydirs]\n");
    print("           [--rrs path] [--minage hrs] directory\n");
    print("  --zoom z[-z2]: Zoom level(s) to handle (default: 0-20).\n");
    print("  --limit n: Limit to the n oldest files (default: no limit)\n");
    print("  --action what: what action to perform with the found files.\n");
    print("    valid actions are: print delete rerender rerenderexpiredlist\n");
    print("                       touchexpiredlist (default: print)\n");
    print("    rerenderexpiredlist and touchexpiredlist: these are very different from the\n");
    print("    other commands, as they expect to read a list of expired tiles to rerender\n");
    print("    or touch on STDIN. The list has to be in the format that osm2pgsql spits\n");
    print("    out. touchexpiredlist sets the mtime of the tiles back 20 years.\n");
    print("  --rmemptydirs: remove empty subdirectories\n");
    print("  --rrs path: if action is rerender, this gives the path to the rerender script.\n");
    print("    it gets called with three parameters: z x y\n");
    print("  --minage hrs: only care about files that are at least hrs hours old\n");
    exit(1);
  } elsif ($curarg eq '--zoom') {
    $i++;
    if ($i >= @ARGV) {
      print("Error: --zoom requires a parameter\n");
      exit(1);
    } else {
      if ($ARGV[$i] =~ m/^(\d+)-(\d+)$/) {
        $zoommin = $1; $zoommax = $2;
      } elsif ($ARGV[$i] =~ m/^(\d+)$/) {
        $zoommin = $1; $zoommax = $1;
      } else {
        print("Error: --zoom needs a single numeric parameter or a range (e.g. 4 or 10-12)\n");
        exit(1);
      }
    }
  } elsif ($curarg eq '--limit') {
    $i++;
    if ($i >= @ARGV) {
      print("Error: --limit requires a parameter\n");
      exit(1);
    } else {
      if ($ARGV[$i] =~ m/^(\d+)$/) {
        $filelimit = $1;
      } else {
        print("Error: --limit requires an positive integer as parameter.\n");
      }
    }
  } elsif ($curarg eq '--action') {
    $i++;
    if ($i >= @ARGV) {
      print("Error: --action requires a parameter\n");
      exit(1);
    } else {
      if      ($ARGV[$i] eq 'print') {
        $action = 0;
      } elsif ($ARGV[$i] eq 'delete') {
        $action = 1;
      } elsif ($ARGV[$i] eq 'rerender') {
        $action = 2;
      } elsif ($ARGV[$i] eq 'rerenderexpiredlist') {
        $action = 3;
      } elsif ($ARGV[$i] eq 'touchexpiredlist') {
        $action = 4;
      } else {
        print("Error: Invalid action selected.\n");
        exit(1);
      }
    }
  } elsif ($curarg eq '--rrs') {
    $i++;
    if ($i >= @ARGV) {
      print("Error: --rrs requires a parameter\n");
      exit(1);
    } else {
      $rrs = $ARGV[$i];
    }
  } elsif ($curarg eq '--minage') {
    $i++;
    if ($i >= @ARGV) {
      print("Error: --minage requires a parameter\n");
      exit(1);
    } else {
      if ($ARGV[$i] =~ m/^(\d+)$/) {
        $minage = $1;
      } else {
        print("Error: --minage requires an positive integer as parameter.\n");
      }
    }
  } elsif ($curarg eq '--rmemptydirs') {
    $rmemptydirs = 1;
  } else {
    if ($fspath eq '') {
      $fspath = $curarg;
    } else {
      print("Too many parameters, or parameter(s) not understood.\n");
      print("I can only handle one directory parameter, but I consider both ");
      print("'$curarg' and '$fspath' a pathname since they are not a known parameter.\n");
      exit(1);
    }
  }
}
if ($fspath eq '') {
  print("ERROR: No path to clear given.\n");
  exit(1);
}
if (($action == 3) || ($action == 4)) { # This significantly differs from the rest of our operations
  dohandleexpiredlist();
  exit(0);
}
@allentries = ();
$totalfilesseen = 0;
runrecursive($fspath, 0);
@allentries = sort(sortbydateasc @allentries);
print(int(@allentries)." files seen\n");
$filesdone = 0;
foreach $ent (@allentries) {
  if ($filelimit > 0) {
    if ($filesdone >= $filelimit) {
      print("File limit $filelimit reached, exiting.\n");
      exit(0);
    }
  }
  #print("Handling File '$ent->[0]', $ent->[5] blocks [u: $ent->[3]  g: $ent->[4] mtime: ".strftime("%Y-%m-%d.%H:%M:%S", localtime($ent->[1]))."]\n");
  if ($action == 0) { # Print
    print("$ent->[0]\n");
  } elsif ($action == 1) { # Delete
    print("DELETING $ent->[0] (mtime " . strftime("%Y-%m-%d.%H:%M:%S", localtime($ent->[1])) . ")\n");
    unless (unlink($ent->[0])) {
      print("ERROR: rm for $ent->[0] failed!\n");
    }
  } elsif ($action == 2) { # Rerender
    rerenderfile($ent->[0]);
  }
  $filesdone++;
}
