| 1 | #!/usr/bin/perl -w |
| 2 | |
| 3 | # OSM tile cleanup / maintenance script. Big parts are a copied from |
| 4 | # hochwasserloeschung.pl. |
| 5 | |
| 6 | use Fcntl ':mode'; |
| 7 | use POSIX 'strftime'; |
| 8 | |
| 9 | $zoommin = 0; |
| 10 | $zoommax = 20; |
| 11 | $filelimit = -1; |
| 12 | $rmemptydirs = 0; |
| 13 | $action = 0; |
| 14 | $rrs = '/bin/echo'; |
| 15 | $minage = 0; # In hours! |
| 16 | $setbackinterval = 7305 * 86400; # 20 years (including leap years) |
| 17 | |
| 18 | # Sort function for candidate list |
| 19 | sub sortbydateasc { |
| 20 | #print("$a->[1] $b->[1]\n"); |
| 21 | return ($a->[1] <=> $b->[1]); |
| 22 | } |
| 23 | |
| 24 | # Remembers a candidate for deletion, if required sorting and purging out the |
| 25 | # list. |
| 26 | # Parameters: 0 filename |
| 27 | # 1 atime |
| 28 | # 2 mtime |
| 29 | # 3 type field from stat (containing DIR, LINK etc.) |
| 30 | # 4 uid |
| 31 | # 5 gid |
| 32 | # 6 size in bytes |
| 33 | sub remembercandidate($$$$$$$) { |
| 34 | if (($_[4] == 0) || ($_[5] == 0)) { |
| 35 | # we do not touch roots files or directories. |
| 36 | return; |
| 37 | } |
| 38 | if ($_[2] > (time() - ($minage * 3600))) { |
| 39 | #print("not adding $_[0], too new\n"); |
| 40 | return; |
| 41 | } |
| 42 | $totalfilesseen++; |
| 43 | push(@allentries, [ $_[0], $_[2], $_[3], $_[4], $_[5], $_[6] ]); |
| 44 | } |
| 45 | |
| 46 | # Par. 0: Path were we start |
| 47 | # Par. 1: Recursion counter |
| 48 | # Returns: Number of files/dirs found (non-recursively!) |
| 49 | sub runrecursive($$); |
| 50 | sub runrecursive($$) { |
| 51 | my $DIR; |
| 52 | my $pth = $_[0]; |
| 53 | my $curdirentry; |
| 54 | my @statres; |
| 55 | my $recctr = $_[1]; |
| 56 | my $direntries = 0; |
| 57 | unless (opendir($DIR, $pth)) { |
| 58 | print("ERROR: Failed to open dir $pth\n"); |
| 59 | exit(1); |
| 60 | } |
| 61 | while ($curdirentrys = readdir($DIR)) { |
| 62 | if (($curdirentrys eq '.') || ($curdirentrys eq '..')) { |
| 63 | next; |
| 64 | } |
| 65 | $direntries++; |
| 66 | $curdirentryf = $pth.'/'.$curdirentrys; |
| 67 | @statres = lstat($curdirentryf); |
| 68 | unless (@statres > 12) { |
| 69 | # This usually happens if we have a symlink to a file for which we have |
| 70 | # no permission to read. |
| 71 | #print("stat failed for $curdirentry\n"); |
| 72 | next; |
| 73 | } |
| 74 | if (S_ISLNK($statres[2])) { next; } # A symlink? We no like. |
| 75 | if ($recctr <= 4) { # There should be no files on these levels |
| 76 | unless (S_ISDIR($statres[2])) { next; } |
| 77 | unless ($curdirentrys =~ m/^\d+$/) { next; } |
| 78 | } else { # whereas there should be only files called .meta on level 5. |
| 79 | unless (S_ISREG($statres[2])) { next; } |
| 80 | unless ($curdirentrys =~ m/^\d+\.meta$/) { next; } |
| 81 | } |
| 82 | if ($recctr == 0) { |
| 83 | my $z = int($curdirentrys); |
| 84 | unless (($z >= $zoommin) && ($z <= $zoommax)) { next; } |
| 85 | runrecursive($curdirentryf, $recctr+1); |
| 86 | } elsif ($recctr <= 4) { |
| 87 | if (runrecursive($curdirentryf, $recctr+1) == 0) { # Empty dir! |
| 88 | if ($rmemptydirs) { |
| 89 | print("RMDIR: $curdirentryf\n"); |
| 90 | rmdir($curdirentryf); |
| 91 | } |
| 92 | } |
| 93 | } else { |
| 94 | remembercandidate($curdirentryf, $statres[8], $statres[9], $statres[2], $statres[4], $statres[5], $statres[12]); |
| 95 | } |
| 96 | } |
| 97 | closedir($DIR); |
| 98 | return $direntries; |
| 99 | } |
| 100 | |
| 101 | # Par. 0: Full path of file |
| 102 | sub rerenderfile($) { |
| 103 | my $fpth = $_[0]; |
| 104 | # We first need to figure out the relevant numbers from the full file path |
| 105 | unless ($fpth =~ m!/(\d+)/(\d+)/(\d+)/(\d+)/(\d+)/(\d+)\.meta$!) { |
| 106 | print("WARNING: rerenderfile: Failed to extract path components for rerendering out of '$fpth' - skipping\n"); |
| 107 | return; |
| 108 | } |
| 109 | my $zl = $1; my @p = ( $2, $3, $4, $5, $6 ); |
| 110 | my $calcx = 0; my $calcy = 0; |
| 111 | my $i; |
| 112 | for ($i = 0; $i < 5; $i++) { |
| 113 | $calcx = ($calcx << 4) | (($p[$i] & 0xf0) >> 4); |
| 114 | $calcy = ($calcy << 4) | (($p[$i] & 0x0f) >> 0); |
| 115 | } |
| 116 | # struct meta_layout { |
| 117 | # char magic[4]; // 'M' 'E' 'T' 'A' |
| 118 | # int count; // METATILE ^ 2 |
| 119 | # int x, y, z; // lowest x,y of this metatile, plus z |
| 120 | my $fi; |
| 121 | unless (open($fi, '<', $fpth)) { |
| 122 | print("WARNING: rerenderfile: Failed to open file '$fpth' - skipping\n"); |
| 123 | return; |
| 124 | } |
| 125 | binmode($fi); |
| 126 | my $dbuf; |
| 127 | unless (read($fi, $dbuf, 20)) { |
| 128 | print("WARNING: rerenderfile: Failed to read info from metatile '$fpth' - skipping\n"); |
| 129 | return; |
| 130 | } |
| 131 | close($fi); undef($fi); |
| 132 | unless (substr($dbuf, 0, 4) eq 'META') { |
| 133 | print("WARNING: rerenderfile: file '$fpth' is not a metatile - skipping\n"); |
| 134 | return; |
| 135 | } |
| 136 | unless (ord(substr($dbuf, 4, 1)) == 64) { |
| 137 | print("WARNING: rerenderfile: file '$fpth' is not a 8x8 metatile - skipping\n"); |
| 138 | return; |
| 139 | } |
| 140 | my $fx = (ord(substr($dbuf, 8, 1)) << 0) | (ord(substr($dbuf, 9, 1)) << 8) |
| 141 | | (ord(substr($dbuf, 10, 1)) << 16) | (ord(substr($dbuf, 11, 1)) << 24); |
| 142 | unless ($fx == $calcx) { |
| 143 | print("WARNING: rerenderfile: file '$fpth' is invalid - xsize $fx != $calcx - skipping\n"); |
| 144 | return; |
| 145 | } |
| 146 | my $fy = (ord(substr($dbuf, 12, 1)) << 0) | (ord(substr($dbuf, 13, 1)) << 8) |
| 147 | | (ord(substr($dbuf, 14, 1)) << 16) | (ord(substr($dbuf, 15, 1)) << 24); |
| 148 | unless ($fy == $calcy) { |
| 149 | print("WARNING: rerenderfile: file '$fpth' is invalid - xsize $fy != $calcy - skipping\n"); |
| 150 | return; |
| 151 | } |
| 152 | my $fz = (ord(substr($dbuf, 16, 1)) << 0) | (ord(substr($dbuf, 17, 1)) << 8) |
| 153 | | (ord(substr($dbuf, 18, 1)) << 16) | (ord(substr($dbuf, 19, 1)) << 24); |
| 154 | unless ($fz == $zl) { |
| 155 | print("WARNING: rerenderfile: file '$fpth' is invalid - zsize $fz != $zl - skipping\n"); |
| 156 | return; |
| 157 | } |
| 158 | print("Sending rendering request for z=$zl x=$calcx y=$calcy to regenerate '$fpth'\n"); |
| 159 | if (system("$rrs $zl $calcx $calcy")) { |
| 160 | print("Error executing $rrs $zl $calcx $calcy\n"); |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | # Par. 0: x |
| 165 | # Par. 1: y |
| 166 | sub calcpathfromcomponents($$) { |
| 167 | my @res = (); |
| 168 | my $i; my $x = $_[0]; my $y = $_[1]; |
| 169 | for ($i = 4; $i >= 0; $i--) { |
| 170 | $res[$i] = sprintf("%d", (($x & 0x0f) << 4) + ($y & 0x0f)); |
| 171 | $x = $x >> 4; |
| 172 | $y = $y >> 4; |
| 173 | } |
| 174 | return $res[0] . "/" . $res[1] . "/" . $res[2] . "/" . $res[3] . "/" . $res[4]; |
| 175 | } |
| 176 | |
| 177 | sub dohandleexpiredlist() { |
| 178 | my $ll; |
| 179 | %rerenderlist = (); |
| 180 | while ($ll = <STDIN>) { |
| 181 | if ($ll =~ m!^(\d+)/(\d+)/(\d+)$!) { |
| 182 | my $x = $2; my $y = $3; my $z = $1; |
| 183 | #print("Handling z=$z x=$x y=$y\n"); |
| 184 | if ($z != ($zoommax - 3)) { |
| 185 | print("Ignoring z=$z x=$x y=$y because of wrong zoom\n"); |
| 186 | next; |
| 187 | } |
| 188 | my $cz; my $cx; my $cy; my $curz; |
| 189 | for ($curz = $zoommin; $curz <= $zoommax; $curz++) { |
| 190 | $cz = $z; $cx = $x; $cy = $y; |
| 191 | while ($cz < $curz) { |
| 192 | $cz++; $cx <<= 1; $cy <<= 1; |
| 193 | } |
| 194 | while ($cz > $curz) { |
| 195 | $cz--; $cx >>= 1; $cy >>= 1; |
| 196 | } |
| 197 | #print("Matching tile at z=$cz: x=$cx y=$cy -"); |
| 198 | $cx = $cx & 0xfff8; $cy = $cy & 0xfff8; |
| 199 | #print(" rounded to x=$cx y=$cy\n"); |
| 200 | $rerenderlist{$cz}{$cx}{$cy} = 1; |
| 201 | } |
| 202 | } |
| 203 | } |
| 204 | my $filesdone = 0; my $filesseen = 0; my $filesalreadytouched = 0; |
| 205 | foreach $z (sort(keys(%rerenderlist))) { |
| 206 | foreach $x (sort(keys(%{$rerenderlist{$z}}))) { |
| 207 | foreach $y (sort(keys(%{$rerenderlist{$z}{$x}}))) { |
| 208 | my $p = ${fspath} . '/' . $z . '/' . calcpathfromcomponents($x, $y) . '.meta'; |
| 209 | #print("Checking: $z $x $y - Path: $p\n"); |
| 210 | $filesseen++; |
| 211 | if (-e $p) { |
| 212 | if ($action == 3) { |
| 213 | $filesdone++; |
| 214 | print("Sending rendering request for z=$z x=$x y=$y to regenerate '$p'\n"); |
| 215 | if (system("$rrs $z $x $y")) { |
| 216 | print("Error executing $rrs $z $x $y\n"); |
| 217 | } |
| 218 | } elsif ($action == 4) { |
| 219 | my $mtime = (stat($p))[9]; |
| 220 | my $curtime = time(); |
| 221 | if (($curtime - $setbackinterval) > $mtime) { |
| 222 | # Do not touch again - it's already 20 years back. |
| 223 | #print("Not touching '$p', it's over 20 years old so probably has already been set back.\n"); |
| 224 | $filesalreadytouched++; |
| 225 | } else { |
| 226 | $filesdone++; |
| 227 | my $newmtime = $mtime - $setbackinterval; |
| 228 | print("Touching '$p' (z=$z x=$x y=$y)\n"); |
| 229 | if (utime($curtime, $newmtime, $p) < 1) { |
| 230 | print("Error touching '$p': $!\n"); |
| 231 | } |
| 232 | } |
| 233 | } else { |
| 234 | print("Internal error - action variable invalid. This is a programming error.\n"); |
| 235 | exit(1); |
| 236 | } |
| 237 | } |
| 238 | } |
| 239 | } |
| 240 | } |
| 241 | if ($action == 3) { |
| 242 | print("Sent re-rendering-requests for $filesdone files that actually existed (of $filesseen candidates)\n"); |
| 243 | } elsif ($action == 4) { |
| 244 | print("Touched $filesdone files ($filesseen candidates, $filesalreadytouched already touched)\n"); |
| 245 | } |
| 246 | } |
| 247 | |
| 248 | $fspath = ''; |
| 249 | for ($i = 0; $i < @ARGV; $i++) { |
| 250 | $curarg = $ARGV[$i]; |
| 251 | if ($curarg eq '--help') { |
| 252 | print("Syntax: $0 [--help] [--zoom z] [--limit n] [--action a] [--rmemptydirs]\n"); |
| 253 | print(" [--rrs path] [--minage hrs] directory\n"); |
| 254 | print(" --zoom z[-z2]: Zoom level(s) to handle (default: 0-20).\n"); |
| 255 | print(" --limit n: Limit to the n oldest files (default: no limit)\n"); |
| 256 | print(" --action what: what action to perform with the found files.\n"); |
| 257 | print(" valid actions are: print delete rerender rerenderexpiredlist\n"); |
| 258 | print(" touchexpiredlist (default: print)\n"); |
| 259 | print(" rerenderexpiredlist and touchexpiredlist: these are very different from the\n"); |
| 260 | print(" other commands, as they expect to read a list of expired tiles to rerender\n"); |
| 261 | print(" or touch on STDIN. The list has to be in the format that osm2pgsql spits\n"); |
| 262 | print(" out. touchexpiredlist sets the mtime of the tiles back 20 years.\n"); |
| 263 | print(" --rmemptydirs: remove empty subdirectories\n"); |
| 264 | print(" --rrs path: if action is rerender, this gives the path to the rerender script.\n"); |
| 265 | print(" it gets called with three parameters: z x y\n"); |
| 266 | print(" --minage hrs: only care about files that are at least hrs hours old\n"); |
| 267 | exit(1); |
| 268 | } elsif ($curarg eq '--zoom') { |
| 269 | $i++; |
| 270 | if ($i >= @ARGV) { |
| 271 | print("Error: --zoom requires a parameter\n"); |
| 272 | exit(1); |
| 273 | } else { |
| 274 | if ($ARGV[$i] =~ m/^(\d+)-(\d+)$/) { |
| 275 | $zoommin = $1; $zoommax = $2; |
| 276 | } elsif ($ARGV[$i] =~ m/^(\d+)$/) { |
| 277 | $zoommin = $1; $zoommax = $1; |
| 278 | } else { |
| 279 | print("Error: --zoom needs a single numeric parameter or a range (e.g. 4 or 10-12)\n"); |
| 280 | exit(1); |
| 281 | } |
| 282 | } |
| 283 | } elsif ($curarg eq '--limit') { |
| 284 | $i++; |
| 285 | if ($i >= @ARGV) { |
| 286 | print("Error: --limit requires a parameter\n"); |
| 287 | exit(1); |
| 288 | } else { |
| 289 | if ($ARGV[$i] =~ m/^(\d+)$/) { |
| 290 | $filelimit = $1; |
| 291 | } else { |
| 292 | print("Error: --limit requires an positive integer as parameter.\n"); |
| 293 | } |
| 294 | } |
| 295 | } elsif ($curarg eq '--action') { |
| 296 | $i++; |
| 297 | if ($i >= @ARGV) { |
| 298 | print("Error: --action requires a parameter\n"); |
| 299 | exit(1); |
| 300 | } else { |
| 301 | if ($ARGV[$i] eq 'print') { |
| 302 | $action = 0; |
| 303 | } elsif ($ARGV[$i] eq 'delete') { |
| 304 | $action = 1; |
| 305 | } elsif ($ARGV[$i] eq 'rerender') { |
| 306 | $action = 2; |
| 307 | } elsif ($ARGV[$i] eq 'rerenderexpiredlist') { |
| 308 | $action = 3; |
| 309 | } elsif ($ARGV[$i] eq 'touchexpiredlist') { |
| 310 | $action = 4; |
| 311 | } else { |
| 312 | print("Error: Invalid action selected.\n"); |
| 313 | exit(1); |
| 314 | } |
| 315 | } |
| 316 | } elsif ($curarg eq '--rrs') { |
| 317 | $i++; |
| 318 | if ($i >= @ARGV) { |
| 319 | print("Error: --rrs requires a parameter\n"); |
| 320 | exit(1); |
| 321 | } else { |
| 322 | $rrs = $ARGV[$i]; |
| 323 | } |
| 324 | } elsif ($curarg eq '--minage') { |
| 325 | $i++; |
| 326 | if ($i >= @ARGV) { |
| 327 | print("Error: --minage requires a parameter\n"); |
| 328 | exit(1); |
| 329 | } else { |
| 330 | if ($ARGV[$i] =~ m/^(\d+)$/) { |
| 331 | $minage = $1; |
| 332 | } else { |
| 333 | print("Error: --minage requires an positive integer as parameter.\n"); |
| 334 | } |
| 335 | } |
| 336 | } elsif ($curarg eq '--rmemptydirs') { |
| 337 | $rmemptydirs = 1; |
| 338 | } else { |
| 339 | if ($fspath eq '') { |
| 340 | $fspath = $curarg; |
| 341 | } else { |
| 342 | print("Too many parameters, or parameter(s) not understood.\n"); |
| 343 | print("I can only handle one directory parameter, but I consider both "); |
| 344 | print("'$curarg' and '$fspath' a pathname since they are not a known parameter.\n"); |
| 345 | exit(1); |
| 346 | } |
| 347 | } |
| 348 | } |
| 349 | if ($fspath eq '') { |
| 350 | print("ERROR: No path to clear given.\n"); |
| 351 | exit(1); |
| 352 | } |
| 353 | if (($action == 3) || ($action == 4)) { # This significantly differs from the rest of our operations |
| 354 | dohandleexpiredlist(); |
| 355 | exit(0); |
| 356 | } |
| 357 | @allentries = (); |
| 358 | $totalfilesseen = 0; |
| 359 | runrecursive($fspath, 0); |
| 360 | @allentries = sort(sortbydateasc @allentries); |
| 361 | print(int(@allentries)." files seen\n"); |
| 362 | $filesdone = 0; |
| 363 | foreach $ent (@allentries) { |
| 364 | if ($filelimit > 0) { |
| 365 | if ($filesdone >= $filelimit) { |
| 366 | print("File limit $filelimit reached, exiting.\n"); |
| 367 | exit(0); |
| 368 | } |
| 369 | } |
| 370 | #print("Handling File '$ent->[0]', $ent->[5] blocks [u: $ent->[3] g: $ent->[4] mtime: ".strftime("%Y-%m-%d.%H:%M:%S", localtime($ent->[1]))."]\n"); |
| 371 | if ($action == 0) { # Print |
| 372 | print("$ent->[0]\n"); |
| 373 | } elsif ($action == 1) { # Delete |
| 374 | print("DELETING $ent->[0] (mtime " . strftime("%Y-%m-%d.%H:%M:%S", localtime($ent->[1])) . ")\n"); |
| 375 | unless (unlink($ent->[0])) { |
| 376 | print("ERROR: rm for $ent->[0] failed!\n"); |
| 377 | } |
| 378 | } elsif ($action == 2) { # Rerender |
| 379 | rerenderfile($ent->[0]); |
| 380 | } |
| 381 | $filesdone++; |
| 382 | } |