optimise brokenlinks by gathering the data when calculating backlinks

During backlink calulation, all links are examined and broken links can
be detected for free, so store a list of broken links and have brokenlinks
use it.

Exposing the %brokenlinks structure is a bit ugly, but the speedup seems
worth it: Around 1 second for wikis the size of the doc wiki that use
brokenlinks.
master
Joey Hess 2009-08-14 01:11:53 -04:00
parent f486271009
commit 82bb3af579
3 changed files with 30 additions and 28 deletions

View File

@ -27,31 +27,27 @@ sub preprocess (@) {
# register a dependency. # register a dependency.
add_depends($params{page}, $params{pages}); add_depends($params{page}, $params{pages});
my %broken;
foreach my $page (pagespec_match_list([keys %links],
$params{pages}, location => $params{page})) {
my %seen;
foreach my $link (@{$links{$page}}) {
next if $seen{$link};
$seen{$link}=1;
next if $link =~ /.*\/\Q$config{discussionpage}\E/i && $config{discussion};
my $bestlink=bestlink($page, $link);
next if length $bestlink;
push @{$broken{$link}}, $page;
}
}
my @broken; my @broken;
foreach my $link (keys %broken) { foreach my $link (keys %IkiWiki::brokenlinks) {
my $page=$broken{$link}->[0]; next if $link =~ /.*\/\Q$config{discussionpage}\E/i && $config{discussion};
my @pages;
foreach my $page (@{$IkiWiki::brokenlinks{$link}}) {
push @pages, $page
if pagespec_match($page, $params{pages}, location => $params{page});
}
next unless @pages;
my $page=$IkiWiki::brokenlinks{$link}->[0];
push @broken, sprintf(gettext("%s from %s"), push @broken, sprintf(gettext("%s from %s"),
htmllink($page, $params{destpage}, $link, noimageinline => 1), htmllink($page, $params{destpage}, $link, noimageinline => 1),
join(", ", map { join(", ", map {
htmllink($params{page}, $params{destpage}, $_, noimageinline => 1) htmllink($params{page}, $params{destpage}, $_, noimageinline => 1)
} @{$broken{$link}})); } @pages)
);
} }
return gettext("There are no broken links!") unless %broken; return gettext("There are no broken links!") unless @broken;
return "<ul>\n" return "<ul>\n"
.join("\n", .join("\n",
map { map {

View File

@ -8,26 +8,31 @@ use IkiWiki;
use Encode; use Encode;
my %backlinks; my %backlinks;
my $backlinks_calculated=0; our %brokenlinks;
my $links_calculated=0;
sub calculate_backlinks () { sub calculate_links () {
return if $backlinks_calculated; return if $links_calculated;
%backlinks=(); %backlinks=%brokenlinks=();
foreach my $page (keys %links) { foreach my $page (keys %links) {
foreach my $link (@{$links{$page}}) { foreach my $link (@{$links{$page}}) {
my $bestlink=bestlink($page, $link); my $bestlink=bestlink($page, $link);
if (length $bestlink && $bestlink ne $page) { if (length $bestlink) {
$backlinks{$bestlink}{$page}=1; $backlinks{$bestlink}{$page}=1
if $bestlink ne $page;
}
else {
push @{$brokenlinks{$link}}, $page;
} }
} }
} }
$backlinks_calculated=1; $links_calculated=1;
} }
sub backlink_pages ($) { sub backlink_pages ($) {
my $page=shift; my $page=shift;
calculate_backlinks(); calculate_links();
return keys %{$backlinks{$page}}; return keys %{$backlinks{$page}};
} }
@ -416,7 +421,7 @@ sub refresh () {
debug(sprintf(gettext("scanning %s"), $file)); debug(sprintf(gettext("scanning %s"), $file));
scan($file); scan($file);
} }
calculate_backlinks(); calculate_links();
foreach my $file (@needsbuild) { foreach my $file (@needsbuild) {
debug(sprintf(gettext("building %s"), $file)); debug(sprintf(gettext("building %s"), $file));
render($file); render($file);

3
debian/changelog vendored
View File

@ -5,7 +5,8 @@ ikiwiki (3.141593) UNRELEASED; urgency=low
* inline: Avoid use of my $_ as it fails with older perls. * inline: Avoid use of my $_ as it fails with older perls.
Closes: #541215 Closes: #541215
* Add discussionpage configuration setting. * Add discussionpage configuration setting.
* Small optimisations. * Several optimisations, including speedups to orphans and brokenlinks
calculation.
-- Joey Hess <joeyh@debian.org> Wed, 12 Aug 2009 12:25:30 -0400 -- Joey Hess <joeyh@debian.org> Wed, 12 Aug 2009 12:25:30 -0400