diff --git a/IkiWiki.pm b/IkiWiki.pm index 80208ef2b..a6869d454 100644 --- a/IkiWiki.pm +++ b/IkiWiki.pm @@ -446,10 +446,11 @@ sub linkify ($$$) { #{{{ } #}}} my %preprocessing; -sub preprocess ($$$) { #{{{ +sub preprocess ($$$;$) { #{{{ my $page=shift; # the page the data comes from my $destpage=shift; # the page the data will appear in (different for inline) my $content=shift; + my $scan=shift; my $handle=sub { my $escape=shift; @@ -459,6 +460,7 @@ sub preprocess ($$$) { #{{{ return "[[$command $params]]"; } elsif (exists $hooks{preprocess}{$command}) { + return "" if $scan && ! $hooks{preprocess}{$command}{scan}; # Note: preserve order of params, some plugins may # consider it significant. my @params; diff --git a/IkiWiki/Plugin/meta.pm b/IkiWiki/Plugin/meta.pm index 5bcd65837..2e5fd7e76 100644 --- a/IkiWiki/Plugin/meta.pm +++ b/IkiWiki/Plugin/meta.pm @@ -13,7 +13,7 @@ my %author; my %authorurl; sub import { #{{{ - hook(type => "preprocess", id => "meta", call => \&preprocess); + hook(type => "preprocess", id => "meta", call => \&preprocess, scan => 1); hook(type => "filter", id => "meta", call => \&filter); hook(type => "pagetemplate", id => "meta", call => \&pagetemplate); } # }}} diff --git a/IkiWiki/Plugin/tag.pm b/IkiWiki/Plugin/tag.pm index 7a1be6bec..6d22c49fd 100644 --- a/IkiWiki/Plugin/tag.pm +++ b/IkiWiki/Plugin/tag.pm @@ -10,7 +10,7 @@ my %tags; sub import { #{{{ hook(type => "getopt", id => "tag", call => \&getopt); - hook(type => "preprocess", id => "tag", call => \&preprocess); + hook(type => "preprocess", id => "tag", call => \&preprocess, scan => 1); hook(type => "pagetemplate", id => "tag", call => \&pagetemplate); } # }}} diff --git a/IkiWiki/Render.pm b/IkiWiki/Render.pm index 026b3582e..da5a5510b 100644 --- a/IkiWiki/Render.pm +++ b/IkiWiki/Render.pm @@ -7,27 +7,42 @@ use strict; use IkiWiki; use Encode; +my %backlinks; +my $backlinks_calculated=0; + +sub calculate_backlinks () { #{{{ + %backlinks=(); + foreach my $page (keys %links) { + foreach my $link (@{$links{$page}}) { + my $bestlink=bestlink($page, $link); + if (length $bestlink && $bestlink ne $page) { + $backlinks{$bestlink}{$page}=1; + } + } + } + $backlinks_calculated=1; +} #}}} + sub backlinks ($) { #{{{ my $page=shift; - my @links; - foreach my $p (keys %links) { - next if bestlink($page, $p) eq $page; + calculate_backlinks() unless $backlinks_calculated; - if (grep { length $_ && bestlink($p, $_) eq $page } @{$links{$p}}) { - my $href=abs2rel(htmlpage($p), dirname($page)); + my @links; + return unless $backlinks{$page}; + foreach my $p (keys %{$backlinks{$page}}) { + my $href=abs2rel(htmlpage($p), dirname($page)); - # Trim common dir prefixes from both pages. - my $p_trimmed=$p; - my $page_trimmed=$page; - my $dir; - 1 while (($dir)=$page_trimmed=~m!^([^/]+/)!) && - defined $dir && - $p_trimmed=~s/^\Q$dir\E// && - $page_trimmed=~s/^\Q$dir\E//; - - push @links, { url => $href, page => pagetitle($p_trimmed) }; - } + # Trim common dir prefixes from both pages. + my $p_trimmed=$p; + my $page_trimmed=$page; + my $dir; + 1 while (($dir)=$page_trimmed=~m!^([^/]+/)!) && + defined $dir && + $p_trimmed=~s/^\Q$dir\E// && + $page_trimmed=~s/^\Q$dir\E//; + + push @links, { url => $href, page => pagetitle($p_trimmed) }; } return sort { $a->{page} cmp $b->{page} } @links; @@ -128,6 +143,11 @@ sub scan ($) { #{{{ my $srcfile=srcfile($file); my $content=readfile($srcfile); my $page=pagename($file); + will_render($page, htmlpage($page), 1); + + # Always needs to be done, since filters might add links + # to the content. + $content=filter($page, $content); my @links; while ($content =~ /(? $oldpagemtime{$page} || $forcerebuild{$page}) { debug("scanning $file"); + push @changed, $file; scan($file); } } - # render any updated files - foreach my $file (@files) { - my $page=pagename($file); - - if (! exists $oldpagemtime{$page} || - mtime(srcfile($file)) > $oldpagemtime{$page} || - $forcerebuild{$page}) { - debug("rendering $file"); - render($file); - $rendered{$file}=1; - } + # render changed and new pages + foreach my $file (@changed) { + debug("rendering $file"); + render($file); + $rendered{$file}=1; } # if any files were added or removed, check to see if each page @@ -310,9 +328,8 @@ FILE: foreach my $file (@files) { } } - # Handle backlinks; if a page has added/removed links, update the - # pages it links to. Also handles rebuilding dependant pages. if (%rendered || @del) { + # rebuild dependant pages foreach my $f (@files) { next if $rendered{$f}; my $p=pagename($f); @@ -330,6 +347,8 @@ FILE: foreach my $file (@files) { } } + # handle backlinks; if a page has added/removed links, + # update the pages it links to my %linkchanged; foreach my $file (keys %rendered, @del) { my $page=pagename($file); @@ -364,7 +383,7 @@ FILE: foreach my $file (@files) { } } - # Remove no longer rendered files. + # remove no longer rendered files foreach my $src (keys %rendered) { my $page=pagename($src); foreach my $file (@{$oldrenderedfiles{$page}}) { diff --git a/debian/NEWS b/debian/NEWS index f3556d622..781a32f59 100644 --- a/debian/NEWS +++ b/debian/NEWS @@ -1,3 +1,11 @@ +ikiwiki (1.32) unstable; urgency=low + + There is a change to the plugin interface in this version. Any plugins that + modify data in %links should pass scan => 1 when registering the hook that + does so. + + -- Joey Hess Sat, 28 Oct 2006 00:13:12 -0400 + ikiwiki (1.29) unstable; urgency=low Wikis need to be rebuilt on upgrade to this version. If you listed your wiki diff --git a/debian/changelog b/debian/changelog index e914d40b3..1f0394502 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,11 +1,18 @@ ikiwiki (1.32) UNRELEASED; urgency=low * Add a separate pass to find page links, and only render each page once, - instead of over and over. This is up to 8 times faster than before! - (This could have introduced some subtle bugs, so it needs to be tested - extensively.) + instead of over and over. Typical speedup is ~4x. Max possible speedup: + 8x. + * Add "scan" parameter to hook(), which is used to make the hook be called + during the scanning pass, as well as the render pass. The meta and tag + plugins need to use the new scan parameter, so will any others that modify + %links. + * Now that links are calculated in a separate pass, it can also + precalculate backlinks in one pass, which is O(N^2) instead of the + previous code that was O(N^3). A very nice speedup for wikis with lots + (thousands) of pages. - -- Joey Hess Fri, 27 Oct 2006 23:21:35 -0400 + -- Joey Hess Fri, 27 Oct 2006 23:27:29 -0400 ikiwiki (1.31) unstable; urgency=low diff --git a/doc/plugins/contrib/googlemaps.mdwn b/doc/plugins/contrib/googlemaps.mdwn index 58ccf5adc..30f630a2c 100644 --- a/doc/plugins/contrib/googlemaps.mdwn +++ b/doc/plugins/contrib/googlemaps.mdwn @@ -1,5 +1,5 @@ [[template id=plugin name=googlemaps author="Christian Mock"]] -[[tag special-purpose]] +[[tag type/special-purpose]] [[meta title="googlemaps (third-party plugin)"]] `googlemaps` is a plugin that allows using the [Google Maps API][2] diff --git a/doc/plugins/write.mdwn b/doc/plugins/write.mdwn index 8492b1756..5cace0911 100644 --- a/doc/plugins/write.mdwn +++ b/doc/plugins/write.mdwn @@ -30,6 +30,12 @@ hook, a "id" paramter, which should be a unique string for this plugin, and a "call" parameter, which is a reference to a function to call for the hook. +An optional "scan" parameter, if set to a true value, makes the hook be +called during the preliminary scan that ikiwiki makes of updated pages, +before begining to render pages. This parameter should be set to true if +the hook modifies data in `%links`. Note that doing so will make the hook +be run twice per page build, so avoid doing it for expensive hooks. + ## Types of hooks In roughly the order they are called. @@ -64,6 +70,14 @@ Runs on the raw source of a page, before anything else touches it, and can make arbitrary changes. The function is passed named parameters `page` and `content` and should return the filtered content. +### scan + + hook(type => "scan", id => "foo", call => \&scan); + +This is identical to a preprocess hook (see below), except that it is +called in the initial pass that scans pages for data that will be used in +later passes. Scan hooks are the only hook that should modify + ### preprocess Adding a [[PreProcessorDirective]] is probably the most common use of a diff --git a/doc/roadmap.mdwn b/doc/roadmap.mdwn index b393f254f..701365a25 100644 --- a/doc/roadmap.mdwn +++ b/doc/roadmap.mdwn @@ -18,7 +18,7 @@ Released 29 April 2006. * [[Tags]] _(status: fair)_ * Should have fully working [[todo/utf8]] support. _(status: good)_ * [[Optimised_rendering|todo/optimisations]] if possible. Deal with other - scalability issues. _(status: something like 9x speedup 1.0!)_ + scalability issues. _(status: should be faster, need to get numbers)_ * Improved [[todo/html]] stylesheets and templates. * Improved scalable [[logo]]. _(status: done)_ * Support for at other revision control systems aside from svn. diff --git a/doc/todo/optimisations.mdwn b/doc/todo/optimisations.mdwn index 13a270b8f..0eb830cd0 100644 --- a/doc/todo/optimisations.mdwn +++ b/doc/todo/optimisations.mdwn @@ -4,18 +4,6 @@ * Look at splitting up CGI.pm. But note that too much splitting can slow perl down. -* The backlinks code turns out to scale badly to wikis with thousands of - pages. The code is O(N^2)! It's called for each page, and it loops - through all the pages to find backlinks. - - Need to find a way to calculate and cache all the backlinks in one pass, - which could be done in at worst O(N), and possibly less (if they're - stored in the index, it could be constant time). But to do this, there - would need to be a way to invalidate or update the cache in these - situations: - - - A page is added. Note that this can change a backlink to point to - the new page instead of the page it pointed to before. - - A page is deleted. This can also change backlinks that pointed to that - page. - - A page is modified. Links added/removed. +* The backlinks calculation code is still O(N^2) on the number of pages. + If backlinks info were stored in the index file, it would go down to + constant time for iterative builds, though still N^2 for rebuilds.