instead of over and over. Typical speedup is ~4x. Max possible speedup:

8x.
* Add "scan" parameter to hook(), which is used to make the hook be called
  during the scanning pass, as well as the render pass. The meta and tag
  plugins need to use the new scan parameter, so will any others that modify
  %links.
* Now that links are calculated in a separate pass, it can also 
  precalculate backlinks in one pass, which is O(N^2) instead of the
  previous code that was O(N^3). A very nice speedup for wikis with lots
  (thousands) of pages.
master
joey 2006-10-28 05:07:56 +00:00
parent 49bf877701
commit db3b72c482
10 changed files with 95 additions and 57 deletions

View File

@ -446,10 +446,11 @@ sub linkify ($$$) { #{{{
} #}}} } #}}}
my %preprocessing; my %preprocessing;
sub preprocess ($$$) { #{{{ sub preprocess ($$$;$) { #{{{
my $page=shift; # the page the data comes from my $page=shift; # the page the data comes from
my $destpage=shift; # the page the data will appear in (different for inline) my $destpage=shift; # the page the data will appear in (different for inline)
my $content=shift; my $content=shift;
my $scan=shift;
my $handle=sub { my $handle=sub {
my $escape=shift; my $escape=shift;
@ -459,6 +460,7 @@ sub preprocess ($$$) { #{{{
return "[[$command $params]]"; return "[[$command $params]]";
} }
elsif (exists $hooks{preprocess}{$command}) { elsif (exists $hooks{preprocess}{$command}) {
return "" if $scan && ! $hooks{preprocess}{$command}{scan};
# Note: preserve order of params, some plugins may # Note: preserve order of params, some plugins may
# consider it significant. # consider it significant.
my @params; my @params;

View File

@ -13,7 +13,7 @@ my %author;
my %authorurl; my %authorurl;
sub import { #{{{ sub import { #{{{
hook(type => "preprocess", id => "meta", call => \&preprocess); hook(type => "preprocess", id => "meta", call => \&preprocess, scan => 1);
hook(type => "filter", id => "meta", call => \&filter); hook(type => "filter", id => "meta", call => \&filter);
hook(type => "pagetemplate", id => "meta", call => \&pagetemplate); hook(type => "pagetemplate", id => "meta", call => \&pagetemplate);
} # }}} } # }}}

View File

@ -10,7 +10,7 @@ my %tags;
sub import { #{{{ sub import { #{{{
hook(type => "getopt", id => "tag", call => \&getopt); hook(type => "getopt", id => "tag", call => \&getopt);
hook(type => "preprocess", id => "tag", call => \&preprocess); hook(type => "preprocess", id => "tag", call => \&preprocess, scan => 1);
hook(type => "pagetemplate", id => "tag", call => \&pagetemplate); hook(type => "pagetemplate", id => "tag", call => \&pagetemplate);
} # }}} } # }}}

View File

@ -7,14 +7,30 @@ use strict;
use IkiWiki; use IkiWiki;
use Encode; use Encode;
my %backlinks;
my $backlinks_calculated=0;
sub calculate_backlinks () { #{{{
%backlinks=();
foreach my $page (keys %links) {
foreach my $link (@{$links{$page}}) {
my $bestlink=bestlink($page, $link);
if (length $bestlink && $bestlink ne $page) {
$backlinks{$bestlink}{$page}=1;
}
}
}
$backlinks_calculated=1;
} #}}}
sub backlinks ($) { #{{{ sub backlinks ($) { #{{{
my $page=shift; my $page=shift;
my @links; calculate_backlinks() unless $backlinks_calculated;
foreach my $p (keys %links) {
next if bestlink($page, $p) eq $page;
if (grep { length $_ && bestlink($p, $_) eq $page } @{$links{$p}}) { my @links;
return unless $backlinks{$page};
foreach my $p (keys %{$backlinks{$page}}) {
my $href=abs2rel(htmlpage($p), dirname($page)); my $href=abs2rel(htmlpage($p), dirname($page));
# Trim common dir prefixes from both pages. # Trim common dir prefixes from both pages.
@ -28,7 +44,6 @@ sub backlinks ($) { #{{{
push @links, { url => $href, page => pagetitle($p_trimmed) }; push @links, { url => $href, page => pagetitle($p_trimmed) };
} }
}
return sort { $a->{page} cmp $b->{page} } @links; return sort { $a->{page} cmp $b->{page} } @links;
} #}}} } #}}}
@ -128,6 +143,11 @@ sub scan ($) { #{{{
my $srcfile=srcfile($file); my $srcfile=srcfile($file);
my $content=readfile($srcfile); my $content=readfile($srcfile);
my $page=pagename($file); my $page=pagename($file);
will_render($page, htmlpage($page), 1);
# Always needs to be done, since filters might add links
# to the content.
$content=filter($page, $content);
my @links; my @links;
while ($content =~ /(?<!\\)$config{wiki_link_regexp}/g) { while ($content =~ /(?<!\\)$config{wiki_link_regexp}/g) {
@ -139,6 +159,9 @@ sub scan ($) { #{{{
push @links, "$page/discussion"; push @links, "$page/discussion";
} }
$links{$page}=\@links; $links{$page}=\@links;
# Preprocess in scan-only mode.
preprocess($page, $page, $content, 1);
} }
} #}}} } #}}}
@ -240,7 +263,6 @@ sub refresh () { #{{{
my $page=pagename($file); my $page=pagename($file);
if (! $oldpagemtime{$page}) { if (! $oldpagemtime{$page}) {
push @add, $file; push @add, $file;
scan($file);
$pagecase{lc $page}=$page; $pagecase{lc $page}=$page;
$pagesources{$page}=$file; $pagesources{$page}=$file;
if ($config{getctime} && -e "$config{srcdir}/$file") { if ($config{getctime} && -e "$config{srcdir}/$file") {
@ -265,7 +287,8 @@ sub refresh () { #{{{
} }
} }
# scan updated files to update info about them # scan changed and new files
my @changed;
foreach my $file (@files) { foreach my $file (@files) {
my $page=pagename($file); my $page=pagename($file);
@ -273,22 +296,17 @@ sub refresh () { #{{{
mtime(srcfile($file)) > $oldpagemtime{$page} || mtime(srcfile($file)) > $oldpagemtime{$page} ||
$forcerebuild{$page}) { $forcerebuild{$page}) {
debug("scanning $file"); debug("scanning $file");
push @changed, $file;
scan($file); scan($file);
} }
} }
# render any updated files # render changed and new pages
foreach my $file (@files) { foreach my $file (@changed) {
my $page=pagename($file);
if (! exists $oldpagemtime{$page} ||
mtime(srcfile($file)) > $oldpagemtime{$page} ||
$forcerebuild{$page}) {
debug("rendering $file"); debug("rendering $file");
render($file); render($file);
$rendered{$file}=1; $rendered{$file}=1;
} }
}
# if any files were added or removed, check to see if each page # if any files were added or removed, check to see if each page
# needs an update due to linking to them or inlining them # needs an update due to linking to them or inlining them
@ -310,9 +328,8 @@ FILE: foreach my $file (@files) {
} }
} }
# Handle backlinks; if a page has added/removed links, update the
# pages it links to. Also handles rebuilding dependant pages.
if (%rendered || @del) { if (%rendered || @del) {
# rebuild dependant pages
foreach my $f (@files) { foreach my $f (@files) {
next if $rendered{$f}; next if $rendered{$f};
my $p=pagename($f); my $p=pagename($f);
@ -330,6 +347,8 @@ FILE: foreach my $file (@files) {
} }
} }
# handle backlinks; if a page has added/removed links,
# update the pages it links to
my %linkchanged; my %linkchanged;
foreach my $file (keys %rendered, @del) { foreach my $file (keys %rendered, @del) {
my $page=pagename($file); my $page=pagename($file);
@ -364,7 +383,7 @@ FILE: foreach my $file (@files) {
} }
} }
# Remove no longer rendered files. # remove no longer rendered files
foreach my $src (keys %rendered) { foreach my $src (keys %rendered) {
my $page=pagename($src); my $page=pagename($src);
foreach my $file (@{$oldrenderedfiles{$page}}) { foreach my $file (@{$oldrenderedfiles{$page}}) {

8
debian/NEWS vendored
View File

@ -1,3 +1,11 @@
ikiwiki (1.32) unstable; urgency=low
There is a change to the plugin interface in this version. Any plugins that
modify data in %links should pass scan => 1 when registering the hook that
does so.
-- Joey Hess <joeyh@debian.org> Sat, 28 Oct 2006 00:13:12 -0400
ikiwiki (1.29) unstable; urgency=low ikiwiki (1.29) unstable; urgency=low
Wikis need to be rebuilt on upgrade to this version. If you listed your wiki Wikis need to be rebuilt on upgrade to this version. If you listed your wiki

15
debian/changelog vendored
View File

@ -1,11 +1,18 @@
ikiwiki (1.32) UNRELEASED; urgency=low ikiwiki (1.32) UNRELEASED; urgency=low
* Add a separate pass to find page links, and only render each page once, * Add a separate pass to find page links, and only render each page once,
instead of over and over. This is up to 8 times faster than before! instead of over and over. Typical speedup is ~4x. Max possible speedup:
(This could have introduced some subtle bugs, so it needs to be tested 8x.
extensively.) * Add "scan" parameter to hook(), which is used to make the hook be called
during the scanning pass, as well as the render pass. The meta and tag
plugins need to use the new scan parameter, so will any others that modify
%links.
* Now that links are calculated in a separate pass, it can also
precalculate backlinks in one pass, which is O(N^2) instead of the
previous code that was O(N^3). A very nice speedup for wikis with lots
(thousands) of pages.
-- Joey Hess <joeyh@debian.org> Fri, 27 Oct 2006 23:21:35 -0400 -- Joey Hess <joeyh@debian.org> Fri, 27 Oct 2006 23:27:29 -0400
ikiwiki (1.31) unstable; urgency=low ikiwiki (1.31) unstable; urgency=low

View File

@ -1,5 +1,5 @@
[[template id=plugin name=googlemaps author="Christian Mock"]] [[template id=plugin name=googlemaps author="Christian Mock"]]
[[tag special-purpose]] [[tag type/special-purpose]]
[[meta title="googlemaps (third-party plugin)"]] [[meta title="googlemaps (third-party plugin)"]]
`googlemaps` is a plugin that allows using the [Google Maps API][2] `googlemaps` is a plugin that allows using the [Google Maps API][2]

View File

@ -30,6 +30,12 @@ hook, a "id" paramter, which should be a unique string for this plugin, and
a "call" parameter, which is a reference to a function to call for the a "call" parameter, which is a reference to a function to call for the
hook. hook.
An optional "scan" parameter, if set to a true value, makes the hook be
called during the preliminary scan that ikiwiki makes of updated pages,
before begining to render pages. This parameter should be set to true if
the hook modifies data in `%links`. Note that doing so will make the hook
be run twice per page build, so avoid doing it for expensive hooks.
## Types of hooks ## Types of hooks
In roughly the order they are called. In roughly the order they are called.
@ -64,6 +70,14 @@ Runs on the raw source of a page, before anything else touches it, and can
make arbitrary changes. The function is passed named parameters `page` and make arbitrary changes. The function is passed named parameters `page` and
`content` and should return the filtered content. `content` and should return the filtered content.
### scan
hook(type => "scan", id => "foo", call => \&scan);
This is identical to a preprocess hook (see below), except that it is
called in the initial pass that scans pages for data that will be used in
later passes. Scan hooks are the only hook that should modify
### preprocess ### preprocess
Adding a [[PreProcessorDirective]] is probably the most common use of a Adding a [[PreProcessorDirective]] is probably the most common use of a

View File

@ -18,7 +18,7 @@ Released 29 April 2006.
* [[Tags]] _(status: fair)_ * [[Tags]] _(status: fair)_
* Should have fully working [[todo/utf8]] support. _(status: good)_ * Should have fully working [[todo/utf8]] support. _(status: good)_
* [[Optimised_rendering|todo/optimisations]] if possible. Deal with other * [[Optimised_rendering|todo/optimisations]] if possible. Deal with other
scalability issues. _(status: something like 9x speedup 1.0!)_ scalability issues. _(status: should be faster, need to get numbers)_
* Improved [[todo/html]] stylesheets and templates. * Improved [[todo/html]] stylesheets and templates.
* Improved scalable [[logo]]. _(status: done)_ * Improved scalable [[logo]]. _(status: done)_
* Support for at other revision control systems aside from svn. * Support for at other revision control systems aside from svn.

View File

@ -4,18 +4,6 @@
* Look at splitting up CGI.pm. But note that too much splitting can slow * Look at splitting up CGI.pm. But note that too much splitting can slow
perl down. perl down.
* The backlinks code turns out to scale badly to wikis with thousands of * The backlinks calculation code is still O(N^2) on the number of pages.
pages. The code is O(N^2)! It's called for each page, and it loops If backlinks info were stored in the index file, it would go down to
through all the pages to find backlinks. constant time for iterative builds, though still N^2 for rebuilds.
Need to find a way to calculate and cache all the backlinks in one pass,
which could be done in at worst O(N), and possibly less (if they're
stored in the index, it could be constant time). But to do this, there
would need to be a way to invalidate or update the cache in these
situations:
- A page is added. Note that this can change a backlink to point to
the new page instead of the page it pointed to before.
- A page is deleted. This can also change backlinks that pointed to that
page.
- A page is modified. Links added/removed.