better stats for touching files
[osmrrze.git] / scripts / osmtilecleanup.pl
CommitLineData
4b713169 1#!/usr/bin/perl -w
2
3# OSM tile cleanup / maintenance script. Big parts are a copied from
4# hochwasserloeschung.pl.
5
6use Fcntl ':mode';
7use POSIX 'strftime';
8
9$zoommin = 0;
10$zoommax = 20;
11$filelimit = -1;
12$rmemptydirs = 0;
13$action = 0;
14$rrs = '/bin/echo';
15$minage = 0; # In hours!
9dd14599 16$setbackinterval = 7305 * 86400; # 20 years (including leap years)
4b713169 17
18# Sort function for candidate list
19sub sortbydateasc {
20 #print("$a->[1] $b->[1]\n");
21 return ($a->[1] <=> $b->[1]);
22}
23
24# Remembers a candidate for deletion, if required sorting and purging out the
25# list.
26# Parameters: 0 filename
27# 1 atime
28# 2 mtime
29# 3 type field from stat (containing DIR, LINK etc.)
30# 4 uid
31# 5 gid
32# 6 size in bytes
33sub remembercandidate($$$$$$$) {
34 if (($_[4] == 0) || ($_[5] == 0)) {
35 # we do not touch roots files or directories.
36 return;
37 }
38 if ($_[2] > (time() - ($minage * 3600))) {
39 #print("not adding $_[0], too new\n");
40 return;
41 }
42 $totalfilesseen++;
43 push(@allentries, [ $_[0], $_[2], $_[3], $_[4], $_[5], $_[6] ]);
44}
45
46# Par. 0: Path were we start
47# Par. 1: Recursion counter
48# Returns: Number of files/dirs found (non-recursively!)
49sub runrecursive($$);
50sub runrecursive($$) {
51 my $DIR;
52 my $pth = $_[0];
53 my $curdirentry;
54 my @statres;
55 my $recctr = $_[1];
56 my $direntries = 0;
57 unless (opendir($DIR, $pth)) {
58 print("ERROR: Failed to open dir $pth\n");
59 exit(1);
60 }
61 while ($curdirentrys = readdir($DIR)) {
62 if (($curdirentrys eq '.') || ($curdirentrys eq '..')) {
63 next;
64 }
65 $direntries++;
66 $curdirentryf = $pth.'/'.$curdirentrys;
67 @statres = lstat($curdirentryf);
68 unless (@statres > 12) {
69 # This usually happens if we have a symlink to a file for which we have
70 # no permission to read.
71 #print("stat failed for $curdirentry\n");
72 next;
73 }
74 if (S_ISLNK($statres[2])) { next; } # A symlink? We no like.
75 if ($recctr <= 4) { # There should be no files on these levels
76 unless (S_ISDIR($statres[2])) { next; }
77 unless ($curdirentrys =~ m/^\d+$/) { next; }
78 } else { # whereas there should be only files called .meta on level 5.
79 unless (S_ISREG($statres[2])) { next; }
80 unless ($curdirentrys =~ m/^\d+\.meta$/) { next; }
81 }
82 if ($recctr == 0) {
83 my $z = int($curdirentrys);
84 unless (($z >= $zoommin) && ($z <= $zoommax)) { next; }
85 runrecursive($curdirentryf, $recctr+1);
86 } elsif ($recctr <= 4) {
87 if (runrecursive($curdirentryf, $recctr+1) == 0) { # Empty dir!
88 if ($rmemptydirs) {
89 print("RMDIR: $curdirentryf\n");
90 rmdir($curdirentryf);
91 }
92 }
93 } else {
94 remembercandidate($curdirentryf, $statres[8], $statres[9], $statres[2], $statres[4], $statres[5], $statres[12]);
95 }
96 }
97 closedir($DIR);
98 return $direntries;
99}
100
101# Par. 0: Full path of file
102sub rerenderfile($) {
103 my $fpth = $_[0];
104 # We first need to figure out the relevant numbers from the full file path
105 unless ($fpth =~ m!/(\d+)/(\d+)/(\d+)/(\d+)/(\d+)/(\d+)\.meta$!) {
106 print("WARNING: rerenderfile: Failed to extract path components for rerendering out of '$fpth' - skipping\n");
107 return;
108 }
109 my $zl = $1; my @p = ( $2, $3, $4, $5, $6 );
110 my $calcx = 0; my $calcy = 0;
111 my $i;
112 for ($i = 0; $i < 5; $i++) {
113 $calcx = ($calcx << 4) | (($p[$i] & 0xf0) >> 4);
114 $calcy = ($calcy << 4) | (($p[$i] & 0x0f) >> 0);
115 }
116 # struct meta_layout {
117 # char magic[4]; // 'M' 'E' 'T' 'A'
118 # int count; // METATILE ^ 2
119 # int x, y, z; // lowest x,y of this metatile, plus z
120 my $fi;
121 unless (open($fi, '<', $fpth)) {
122 print("WARNING: rerenderfile: Failed to open file '$fpth' - skipping\n");
123 return;
124 }
125 binmode($fi);
126 my $dbuf;
127 unless (read($fi, $dbuf, 20)) {
128 print("WARNING: rerenderfile: Failed to read info from metatile '$fpth' - skipping\n");
129 return;
130 }
131 close($fi); undef($fi);
132 unless (substr($dbuf, 0, 4) eq 'META') {
133 print("WARNING: rerenderfile: file '$fpth' is not a metatile - skipping\n");
134 return;
135 }
136 unless (ord(substr($dbuf, 4, 1)) == 64) {
137 print("WARNING: rerenderfile: file '$fpth' is not a 8x8 metatile - skipping\n");
138 return;
139 }
140 my $fx = (ord(substr($dbuf, 8, 1)) << 0) | (ord(substr($dbuf, 9, 1)) << 8)
141 | (ord(substr($dbuf, 10, 1)) << 16) | (ord(substr($dbuf, 11, 1)) << 24);
142 unless ($fx == $calcx) {
143 print("WARNING: rerenderfile: file '$fpth' is invalid - xsize $fx != $calcx - skipping\n");
144 return;
145 }
146 my $fy = (ord(substr($dbuf, 12, 1)) << 0) | (ord(substr($dbuf, 13, 1)) << 8)
147 | (ord(substr($dbuf, 14, 1)) << 16) | (ord(substr($dbuf, 15, 1)) << 24);
148 unless ($fy == $calcy) {
149 print("WARNING: rerenderfile: file '$fpth' is invalid - xsize $fy != $calcy - skipping\n");
150 return;
151 }
152 my $fz = (ord(substr($dbuf, 16, 1)) << 0) | (ord(substr($dbuf, 17, 1)) << 8)
153 | (ord(substr($dbuf, 18, 1)) << 16) | (ord(substr($dbuf, 19, 1)) << 24);
154 unless ($fz == $zl) {
155 print("WARNING: rerenderfile: file '$fpth' is invalid - zsize $fz != $zl - skipping\n");
156 return;
157 }
158 print("Sending rendering request for z=$zl x=$calcx y=$calcy to regenerate '$fpth'\n");
159 if (system("$rrs $zl $calcx $calcy")) {
160 print("Error executing $rrs $zl $calcx $calcy\n");
161 }
162}
163
164# Par. 0: x
165# Par. 1: y
166sub calcpathfromcomponents($$) {
167 my @res = ();
168 my $i; my $x = $_[0]; my $y = $_[1];
169 for ($i = 4; $i >= 0; $i--) {
170 $res[$i] = sprintf("%d", (($x & 0x0f) << 4) + ($y & 0x0f));
171 $x = $x >> 4;
172 $y = $y >> 4;
173 }
174 return $res[0] . "/" . $res[1] . "/" . $res[2] . "/" . $res[3] . "/" . $res[4];
175}
176
9dd14599 177sub dohandleexpiredlist() {
4b713169 178 my $ll;
179 %rerenderlist = ();
180 while ($ll = <STDIN>) {
181 if ($ll =~ m!^(\d+)/(\d+)/(\d+)$!) {
182 my $x = $2; my $y = $3; my $z = $1;
183 #print("Handling z=$z x=$x y=$y\n");
184 if ($z != ($zoommax - 3)) {
185 print("Ignoring z=$z x=$x y=$y because of wrong zoom\n");
186 next;
187 }
188 my $cz; my $cx; my $cy; my $curz;
189 for ($curz = $zoommin; $curz <= $zoommax; $curz++) {
190 $cz = $z; $cx = $x; $cy = $y;
191 while ($cz < $curz) {
192 $cz++; $cx <<= 1; $cy <<= 1;
193 }
194 while ($cz > $curz) {
195 $cz--; $cx >>= 1; $cy >>= 1;
196 }
197 #print("Matching tile at z=$cz: x=$cx y=$cy -");
198 $cx = $cx & 0xfff8; $cy = $cy & 0xfff8;
199 #print(" rounded to x=$cx y=$cy\n");
200 $rerenderlist{$cz}{$cx}{$cy} = 1;
201 }
202 }
203 }
019fc8e7 204 my $filesdone = 0; my $filesseen = 0; my $filesalreadytouched = 0;
4b713169 205 foreach $z (sort(keys(%rerenderlist))) {
206 foreach $x (sort(keys(%{$rerenderlist{$z}}))) {
207 foreach $y (sort(keys(%{$rerenderlist{$z}{$x}}))) {
208 my $p = ${fspath} . '/' . $z . '/' . calcpathfromcomponents($x, $y) . '.meta';
209 #print("Checking: $z $x $y - Path: $p\n");
210 $filesseen++;
211 if (-e $p) {
9dd14599
MM
212 if ($action == 3) {
213 $filesdone++;
214 print("Sending rendering request for z=$z x=$x y=$y to regenerate '$p'\n");
215 if (system("$rrs $z $x $y")) {
216 print("Error executing $rrs $z $x $y\n");
217 }
218 } elsif ($action == 4) {
219 my $mtime = (stat($p))[9];
220 my $curtime = time();
221 if (($curtime - $setbackinterval) > $mtime) {
222 # Do not touch again - it's already 20 years back.
36c86043 223 #print("Not touching '$p', it's over 20 years old so probably has already been set back.\n");
019fc8e7 224 $filesalreadytouched++;
9dd14599 225 } else {
36c86043 226 $filesdone++;
9dd14599
MM
227 my $newmtime = $mtime - $setbackinterval;
228 print("Touching '$p' (z=$z x=$x y=$y)\n");
229 if (utime($curtime, $newmtime, $p) < 1) {
230 print("Error touching '$p': $!\n");
231 }
232 }
233 } else {
234 print("Internal error - action variable invalid. This is a programming error.\n");
235 exit(1);
4b713169 236 }
237 }
238 }
239 }
240 }
9dd14599
MM
241 if ($action == 3) {
242 print("Sent re-rendering-requests for $filesdone files that actually existed (of $filesseen candidates)\n");
243 } elsif ($action == 4) {
019fc8e7 244 print("Touched $filesdone files ($filesseen candidates, $filesalreadytouched already touched)\n");
9dd14599 245 }
4b713169 246}
247
248$fspath = '';
249for ($i = 0; $i < @ARGV; $i++) {
250 $curarg = $ARGV[$i];
251 if ($curarg eq '--help') {
9dd14599
MM
252 print("Syntax: $0 [--help] [--zoom z] [--limit n] [--action a] [--rmemptydirs]\n");
253 print(" [--rrs path] [--minage hrs] directory\n");
4b713169 254 print(" --zoom z[-z2]: Zoom level(s) to handle (default: 0-20).\n");
255 print(" --limit n: Limit to the n oldest files (default: no limit)\n");
256 print(" --action what: what action to perform with the found files.\n");
9dd14599
MM
257 print(" valid actions are: print delete rerender rerenderexpiredlist\n");
258 print(" touchexpiredlist (default: print)\n");
259 print(" rerenderexpiredlist and touchexpiredlist: these are very different from the\n");
260 print(" other commands, as they expect to read a list of expired tiles to rerender\n");
261 print(" or touch on STDIN. The list has to be in the format that osm2pgsql spits\n");
262 print(" out. touchexpiredlist sets the mtime of the tiles back 20 years.\n");
4b713169 263 print(" --rmemptydirs: remove empty subdirectories\n");
264 print(" --rrs path: if action is rerender, this gives the path to the rerender script.\n");
265 print(" it gets called with three parameters: z x y\n");
266 print(" --minage hrs: only care about files that are at least hrs hours old\n");
267 exit(1);
268 } elsif ($curarg eq '--zoom') {
269 $i++;
270 if ($i >= @ARGV) {
271 print("Error: --zoom requires a parameter\n");
272 exit(1);
273 } else {
274 if ($ARGV[$i] =~ m/^(\d+)-(\d+)$/) {
275 $zoommin = $1; $zoommax = $2;
276 } elsif ($ARGV[$i] =~ m/^(\d+)$/) {
277 $zoommin = $1; $zoommax = $1;
278 } else {
279 print("Error: --zoom needs a single numeric parameter or a range (e.g. 4 or 10-12)\n");
280 exit(1);
281 }
282 }
283 } elsif ($curarg eq '--limit') {
284 $i++;
285 if ($i >= @ARGV) {
286 print("Error: --limit requires a parameter\n");
287 exit(1);
288 } else {
289 if ($ARGV[$i] =~ m/^(\d+)$/) {
290 $filelimit = $1;
291 } else {
292 print("Error: --limit requires an positive integer as parameter.\n");
293 }
294 }
295 } elsif ($curarg eq '--action') {
296 $i++;
297 if ($i >= @ARGV) {
298 print("Error: --action requires a parameter\n");
299 exit(1);
300 } else {
301 if ($ARGV[$i] eq 'print') {
302 $action = 0;
303 } elsif ($ARGV[$i] eq 'delete') {
304 $action = 1;
305 } elsif ($ARGV[$i] eq 'rerender') {
306 $action = 2;
307 } elsif ($ARGV[$i] eq 'rerenderexpiredlist') {
308 $action = 3;
9dd14599
MM
309 } elsif ($ARGV[$i] eq 'touchexpiredlist') {
310 $action = 4;
4b713169 311 } else {
312 print("Error: Invalid action selected.\n");
313 exit(1);
314 }
315 }
316 } elsif ($curarg eq '--rrs') {
317 $i++;
318 if ($i >= @ARGV) {
319 print("Error: --rrs requires a parameter\n");
320 exit(1);
321 } else {
322 $rrs = $ARGV[$i];
323 }
324 } elsif ($curarg eq '--minage') {
325 $i++;
326 if ($i >= @ARGV) {
327 print("Error: --minage requires a parameter\n");
328 exit(1);
329 } else {
330 if ($ARGV[$i] =~ m/^(\d+)$/) {
331 $minage = $1;
332 } else {
333 print("Error: --minage requires an positive integer as parameter.\n");
334 }
335 }
336 } elsif ($curarg eq '--rmemptydirs') {
337 $rmemptydirs = 1;
338 } else {
339 if ($fspath eq '') {
340 $fspath = $curarg;
341 } else {
342 print("Too many parameters, or parameter(s) not understood.\n");
343 print("I can only handle one directory parameter, but I consider both ");
344 print("'$curarg' and '$fspath' a pathname since they are not a known parameter.\n");
345 exit(1);
346 }
347 }
348}
349if ($fspath eq '') {
350 print("ERROR: No path to clear given.\n");
351 exit(1);
352}
9dd14599
MM
353if (($action == 3) || ($action == 4)) { # This significantly differs from the rest of our operations
354 dohandleexpiredlist();
4b713169 355 exit(0);
356}
357@allentries = ();
358$totalfilesseen = 0;
359runrecursive($fspath, 0);
360@allentries = sort(sortbydateasc @allentries);
361print(int(@allentries)." files seen\n");
362$filesdone = 0;
363foreach $ent (@allentries) {
364 if ($filelimit > 0) {
365 if ($filesdone >= $filelimit) {
366 print("File limit $filelimit reached, exiting.\n");
367 exit(0);
368 }
369 }
370 #print("Handling File '$ent->[0]', $ent->[5] blocks [u: $ent->[3] g: $ent->[4] mtime: ".strftime("%Y-%m-%d.%H:%M:%S", localtime($ent->[1]))."]\n");
371 if ($action == 0) { # Print
372 print("$ent->[0]\n");
373 } elsif ($action == 1) { # Delete
374 print("DELETING $ent->[0] (mtime " . strftime("%Y-%m-%d.%H:%M:%S", localtime($ent->[1])) . ")\n");
375 unless (unlink($ent->[0])) {
376 print("ERROR: rm for $ent->[0] failed!\n");
377 }
378 } elsif ($action == 2) { # Rerender
379 rerenderfile($ent->[0]);
380 }
381 $filesdone++;
382}
This page took 0.119397 seconds and 4 git commands to generate.