Optimize away most expensive file prune calls, when refreshing
Benchmarking refresh of a a wiki with 25 thousand pages showed file_pruned() using most of the time. But, when refreshing, ikiwiki already knows about nearly all the files. So we can skip calling file_pruned() for those it knows about. While tricky to do, this sped up a refresh (that otherwise does no work) by 10-50%.master
parent
4b8ca7cfc1
commit
3bb00d142a
17
IkiWiki.pm
17
IkiWiki.pm
|
@ -32,7 +32,6 @@ our $installdir='/usr'; # INSTALLDIR_AUTOREPLACE done by Makefile, DNE
|
|||
use Memoize;
|
||||
memoize("abs2rel");
|
||||
memoize("pagespec_translate");
|
||||
memoize("file_pruned");
|
||||
memoize("template_file");
|
||||
|
||||
sub getsetup () {
|
||||
|
@ -1770,14 +1769,18 @@ sub add_depends ($$) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
sub file_pruned ($$) {
|
||||
require File::Spec;
|
||||
my $file=File::Spec->canonpath(shift);
|
||||
my $base=File::Spec->canonpath(shift);
|
||||
$file =~ s#^\Q$base\E/+##;
|
||||
sub file_pruned ($;$) {
|
||||
my $file=shift;
|
||||
if (@_) {
|
||||
require File::Spec;
|
||||
$file=File::Spec->canonpath($file);
|
||||
my $base=File::Spec->canonpath(shift);
|
||||
return if $file eq $base;
|
||||
$file =~ s#^\Q$base\E/+##;
|
||||
}
|
||||
|
||||
my $regexp='('.join('|', @{$config{wiki_file_prune_regexps}}).')';
|
||||
return $file =~ m/$regexp/ && $file ne $base;
|
||||
return $file =~ m/$regexp/;
|
||||
}
|
||||
|
||||
sub define_gettext () {
|
||||
|
|
|
@ -279,24 +279,26 @@ sub find_src_files () {
|
|||
find({
|
||||
no_chdir => 1,
|
||||
wanted => sub {
|
||||
$_=decode_utf8($_);
|
||||
if (file_pruned($_, $config{srcdir})) {
|
||||
my $file=decode_utf8($_);
|
||||
$file=~s/^\Q$config{srcdir}\E\/?//;
|
||||
my $page = pagename($file);
|
||||
if (! exists $pagesources{$page} &&
|
||||
file_pruned($file)) {
|
||||
$File::Find::prune=1;
|
||||
return;
|
||||
}
|
||||
elsif (! -l $_ && ! -d _) {
|
||||
my ($f)=/$config{wiki_file_regexp}/; # untaint
|
||||
if (! defined $f) {
|
||||
warn(sprintf(gettext("skipping bad filename %s"), $_)."\n");
|
||||
}
|
||||
else {
|
||||
$f=~s/^\Q$config{srcdir}\E\/?//;
|
||||
push @files, $f;
|
||||
my $pagename = pagename($f);
|
||||
if ($pages{$pagename}) {
|
||||
debug(sprintf(gettext("%s has multiple possible source pages"), $pagename));
|
||||
}
|
||||
$pages{$pagename}=1;
|
||||
return if -l $_ || -d _ || ! length $file;
|
||||
|
||||
my ($f) = $file =~ /$config{wiki_file_regexp}/; # untaint
|
||||
if (! defined $f) {
|
||||
warn(sprintf(gettext("skipping bad filename %s"), $file)."\n");
|
||||
}
|
||||
else {
|
||||
push @files, $f;
|
||||
if ($pages{$page}) {
|
||||
debug(sprintf(gettext("%s has multiple possible source pages"), $page));
|
||||
}
|
||||
$pages{$page}=1;
|
||||
}
|
||||
},
|
||||
}, $config{srcdir});
|
||||
|
@ -304,27 +306,28 @@ sub find_src_files () {
|
|||
find({
|
||||
no_chdir => 1,
|
||||
wanted => sub {
|
||||
$_=decode_utf8($_);
|
||||
if (file_pruned($_, $dir)) {
|
||||
my $file=decode_utf8($_);
|
||||
$file=~s/^\Q$dir\E\/?//;
|
||||
my $page=pagename($file);
|
||||
if (! exists $pagesources{$page} &&
|
||||
file_pruned($file)) {
|
||||
$File::Find::prune=1;
|
||||
return;
|
||||
}
|
||||
elsif (! -l $_ && ! -d _) {
|
||||
my ($f)=/$config{wiki_file_regexp}/; # untaint
|
||||
if (! defined $f) {
|
||||
warn(sprintf(gettext("skipping bad filename %s"), $_)."\n");
|
||||
}
|
||||
else {
|
||||
$f=~s/^\Q$dir\E\/?//;
|
||||
# avoid underlaydir
|
||||
# override attacks; see
|
||||
# security.mdwn
|
||||
if (! -l "$config{srcdir}/$f" &&
|
||||
! -e _) {
|
||||
my $page=pagename($f);
|
||||
if (! $pages{$page}) {
|
||||
push @files, $f;
|
||||
$pages{$page}=1;
|
||||
}
|
||||
return if -l $_ || -d _ || ! length $file;
|
||||
|
||||
my ($f) = $file =~ /$config{wiki_file_regexp}/; # untaint
|
||||
if (! defined $f) {
|
||||
warn(sprintf(gettext("skipping bad filename %s"), $file)."\n");
|
||||
}
|
||||
else {
|
||||
# avoid underlaydir override
|
||||
# attacks; see security.mdwn
|
||||
if (! -l "$config{srcdir}/$f" &&
|
||||
! -e _) {
|
||||
if (! $pages{$page}) {
|
||||
push @files, $f;
|
||||
$pages{$page}=1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,6 +10,8 @@ ikiwiki (3.14159266) UNRELEASED; urgency=low
|
|||
* mirrorlist: Display nothing if list is empty.
|
||||
* Fix a bug that could lead to duplicate links being recorded
|
||||
for tags.
|
||||
* Optimize away most expensive file prune calls, when refreshing,
|
||||
by only checking new files.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Sun, 27 Sep 2009 17:40:03 -0400
|
||||
|
||||
|
|
Loading…
Reference in New Issue