instead of over and over. Typical speedup is ~4x. Max possible speedup:
8x. * Add "scan" parameter to hook(), which is used to make the hook be called during the scanning pass, as well as the render pass. The meta and tag plugins need to use the new scan parameter, so will any others that modify %links. * Now that links are calculated in a separate pass, it can also precalculate backlinks in one pass, which is O(N^2) instead of the previous code that was O(N^3). A very nice speedup for wikis with lots (thousands) of pages.master
parent
49bf877701
commit
db3b72c482
|
@ -446,10 +446,11 @@ sub linkify ($$$) { #{{{
|
||||||
} #}}}
|
} #}}}
|
||||||
|
|
||||||
my %preprocessing;
|
my %preprocessing;
|
||||||
sub preprocess ($$$) { #{{{
|
sub preprocess ($$$;$) { #{{{
|
||||||
my $page=shift; # the page the data comes from
|
my $page=shift; # the page the data comes from
|
||||||
my $destpage=shift; # the page the data will appear in (different for inline)
|
my $destpage=shift; # the page the data will appear in (different for inline)
|
||||||
my $content=shift;
|
my $content=shift;
|
||||||
|
my $scan=shift;
|
||||||
|
|
||||||
my $handle=sub {
|
my $handle=sub {
|
||||||
my $escape=shift;
|
my $escape=shift;
|
||||||
|
@ -459,6 +460,7 @@ sub preprocess ($$$) { #{{{
|
||||||
return "[[$command $params]]";
|
return "[[$command $params]]";
|
||||||
}
|
}
|
||||||
elsif (exists $hooks{preprocess}{$command}) {
|
elsif (exists $hooks{preprocess}{$command}) {
|
||||||
|
return "" if $scan && ! $hooks{preprocess}{$command}{scan};
|
||||||
# Note: preserve order of params, some plugins may
|
# Note: preserve order of params, some plugins may
|
||||||
# consider it significant.
|
# consider it significant.
|
||||||
my @params;
|
my @params;
|
||||||
|
|
|
@ -13,7 +13,7 @@ my %author;
|
||||||
my %authorurl;
|
my %authorurl;
|
||||||
|
|
||||||
sub import { #{{{
|
sub import { #{{{
|
||||||
hook(type => "preprocess", id => "meta", call => \&preprocess);
|
hook(type => "preprocess", id => "meta", call => \&preprocess, scan => 1);
|
||||||
hook(type => "filter", id => "meta", call => \&filter);
|
hook(type => "filter", id => "meta", call => \&filter);
|
||||||
hook(type => "pagetemplate", id => "meta", call => \&pagetemplate);
|
hook(type => "pagetemplate", id => "meta", call => \&pagetemplate);
|
||||||
} # }}}
|
} # }}}
|
||||||
|
|
|
@ -10,7 +10,7 @@ my %tags;
|
||||||
|
|
||||||
sub import { #{{{
|
sub import { #{{{
|
||||||
hook(type => "getopt", id => "tag", call => \&getopt);
|
hook(type => "getopt", id => "tag", call => \&getopt);
|
||||||
hook(type => "preprocess", id => "tag", call => \&preprocess);
|
hook(type => "preprocess", id => "tag", call => \&preprocess, scan => 1);
|
||||||
hook(type => "pagetemplate", id => "tag", call => \&pagetemplate);
|
hook(type => "pagetemplate", id => "tag", call => \&pagetemplate);
|
||||||
} # }}}
|
} # }}}
|
||||||
|
|
||||||
|
|
|
@ -7,14 +7,30 @@ use strict;
|
||||||
use IkiWiki;
|
use IkiWiki;
|
||||||
use Encode;
|
use Encode;
|
||||||
|
|
||||||
|
my %backlinks;
|
||||||
|
my $backlinks_calculated=0;
|
||||||
|
|
||||||
|
sub calculate_backlinks () { #{{{
|
||||||
|
%backlinks=();
|
||||||
|
foreach my $page (keys %links) {
|
||||||
|
foreach my $link (@{$links{$page}}) {
|
||||||
|
my $bestlink=bestlink($page, $link);
|
||||||
|
if (length $bestlink && $bestlink ne $page) {
|
||||||
|
$backlinks{$bestlink}{$page}=1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$backlinks_calculated=1;
|
||||||
|
} #}}}
|
||||||
|
|
||||||
sub backlinks ($) { #{{{
|
sub backlinks ($) { #{{{
|
||||||
my $page=shift;
|
my $page=shift;
|
||||||
|
|
||||||
my @links;
|
calculate_backlinks() unless $backlinks_calculated;
|
||||||
foreach my $p (keys %links) {
|
|
||||||
next if bestlink($page, $p) eq $page;
|
|
||||||
|
|
||||||
if (grep { length $_ && bestlink($p, $_) eq $page } @{$links{$p}}) {
|
my @links;
|
||||||
|
return unless $backlinks{$page};
|
||||||
|
foreach my $p (keys %{$backlinks{$page}}) {
|
||||||
my $href=abs2rel(htmlpage($p), dirname($page));
|
my $href=abs2rel(htmlpage($p), dirname($page));
|
||||||
|
|
||||||
# Trim common dir prefixes from both pages.
|
# Trim common dir prefixes from both pages.
|
||||||
|
@ -28,7 +44,6 @@ sub backlinks ($) { #{{{
|
||||||
|
|
||||||
push @links, { url => $href, page => pagetitle($p_trimmed) };
|
push @links, { url => $href, page => pagetitle($p_trimmed) };
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return sort { $a->{page} cmp $b->{page} } @links;
|
return sort { $a->{page} cmp $b->{page} } @links;
|
||||||
} #}}}
|
} #}}}
|
||||||
|
@ -128,6 +143,11 @@ sub scan ($) { #{{{
|
||||||
my $srcfile=srcfile($file);
|
my $srcfile=srcfile($file);
|
||||||
my $content=readfile($srcfile);
|
my $content=readfile($srcfile);
|
||||||
my $page=pagename($file);
|
my $page=pagename($file);
|
||||||
|
will_render($page, htmlpage($page), 1);
|
||||||
|
|
||||||
|
# Always needs to be done, since filters might add links
|
||||||
|
# to the content.
|
||||||
|
$content=filter($page, $content);
|
||||||
|
|
||||||
my @links;
|
my @links;
|
||||||
while ($content =~ /(?<!\\)$config{wiki_link_regexp}/g) {
|
while ($content =~ /(?<!\\)$config{wiki_link_regexp}/g) {
|
||||||
|
@ -139,6 +159,9 @@ sub scan ($) { #{{{
|
||||||
push @links, "$page/discussion";
|
push @links, "$page/discussion";
|
||||||
}
|
}
|
||||||
$links{$page}=\@links;
|
$links{$page}=\@links;
|
||||||
|
|
||||||
|
# Preprocess in scan-only mode.
|
||||||
|
preprocess($page, $page, $content, 1);
|
||||||
}
|
}
|
||||||
} #}}}
|
} #}}}
|
||||||
|
|
||||||
|
@ -240,7 +263,6 @@ sub refresh () { #{{{
|
||||||
my $page=pagename($file);
|
my $page=pagename($file);
|
||||||
if (! $oldpagemtime{$page}) {
|
if (! $oldpagemtime{$page}) {
|
||||||
push @add, $file;
|
push @add, $file;
|
||||||
scan($file);
|
|
||||||
$pagecase{lc $page}=$page;
|
$pagecase{lc $page}=$page;
|
||||||
$pagesources{$page}=$file;
|
$pagesources{$page}=$file;
|
||||||
if ($config{getctime} && -e "$config{srcdir}/$file") {
|
if ($config{getctime} && -e "$config{srcdir}/$file") {
|
||||||
|
@ -265,7 +287,8 @@ sub refresh () { #{{{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# scan updated files to update info about them
|
# scan changed and new files
|
||||||
|
my @changed;
|
||||||
foreach my $file (@files) {
|
foreach my $file (@files) {
|
||||||
my $page=pagename($file);
|
my $page=pagename($file);
|
||||||
|
|
||||||
|
@ -273,22 +296,17 @@ sub refresh () { #{{{
|
||||||
mtime(srcfile($file)) > $oldpagemtime{$page} ||
|
mtime(srcfile($file)) > $oldpagemtime{$page} ||
|
||||||
$forcerebuild{$page}) {
|
$forcerebuild{$page}) {
|
||||||
debug("scanning $file");
|
debug("scanning $file");
|
||||||
|
push @changed, $file;
|
||||||
scan($file);
|
scan($file);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# render any updated files
|
# render changed and new pages
|
||||||
foreach my $file (@files) {
|
foreach my $file (@changed) {
|
||||||
my $page=pagename($file);
|
|
||||||
|
|
||||||
if (! exists $oldpagemtime{$page} ||
|
|
||||||
mtime(srcfile($file)) > $oldpagemtime{$page} ||
|
|
||||||
$forcerebuild{$page}) {
|
|
||||||
debug("rendering $file");
|
debug("rendering $file");
|
||||||
render($file);
|
render($file);
|
||||||
$rendered{$file}=1;
|
$rendered{$file}=1;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
# if any files were added or removed, check to see if each page
|
# if any files were added or removed, check to see if each page
|
||||||
# needs an update due to linking to them or inlining them
|
# needs an update due to linking to them or inlining them
|
||||||
|
@ -310,9 +328,8 @@ FILE: foreach my $file (@files) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Handle backlinks; if a page has added/removed links, update the
|
|
||||||
# pages it links to. Also handles rebuilding dependant pages.
|
|
||||||
if (%rendered || @del) {
|
if (%rendered || @del) {
|
||||||
|
# rebuild dependant pages
|
||||||
foreach my $f (@files) {
|
foreach my $f (@files) {
|
||||||
next if $rendered{$f};
|
next if $rendered{$f};
|
||||||
my $p=pagename($f);
|
my $p=pagename($f);
|
||||||
|
@ -330,6 +347,8 @@ FILE: foreach my $file (@files) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# handle backlinks; if a page has added/removed links,
|
||||||
|
# update the pages it links to
|
||||||
my %linkchanged;
|
my %linkchanged;
|
||||||
foreach my $file (keys %rendered, @del) {
|
foreach my $file (keys %rendered, @del) {
|
||||||
my $page=pagename($file);
|
my $page=pagename($file);
|
||||||
|
@ -364,7 +383,7 @@ FILE: foreach my $file (@files) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Remove no longer rendered files.
|
# remove no longer rendered files
|
||||||
foreach my $src (keys %rendered) {
|
foreach my $src (keys %rendered) {
|
||||||
my $page=pagename($src);
|
my $page=pagename($src);
|
||||||
foreach my $file (@{$oldrenderedfiles{$page}}) {
|
foreach my $file (@{$oldrenderedfiles{$page}}) {
|
||||||
|
|
|
@ -1,3 +1,11 @@
|
||||||
|
ikiwiki (1.32) unstable; urgency=low
|
||||||
|
|
||||||
|
There is a change to the plugin interface in this version. Any plugins that
|
||||||
|
modify data in %links should pass scan => 1 when registering the hook that
|
||||||
|
does so.
|
||||||
|
|
||||||
|
-- Joey Hess <joeyh@debian.org> Sat, 28 Oct 2006 00:13:12 -0400
|
||||||
|
|
||||||
ikiwiki (1.29) unstable; urgency=low
|
ikiwiki (1.29) unstable; urgency=low
|
||||||
|
|
||||||
Wikis need to be rebuilt on upgrade to this version. If you listed your wiki
|
Wikis need to be rebuilt on upgrade to this version. If you listed your wiki
|
||||||
|
|
|
@ -1,11 +1,18 @@
|
||||||
ikiwiki (1.32) UNRELEASED; urgency=low
|
ikiwiki (1.32) UNRELEASED; urgency=low
|
||||||
|
|
||||||
* Add a separate pass to find page links, and only render each page once,
|
* Add a separate pass to find page links, and only render each page once,
|
||||||
instead of over and over. This is up to 8 times faster than before!
|
instead of over and over. Typical speedup is ~4x. Max possible speedup:
|
||||||
(This could have introduced some subtle bugs, so it needs to be tested
|
8x.
|
||||||
extensively.)
|
* Add "scan" parameter to hook(), which is used to make the hook be called
|
||||||
|
during the scanning pass, as well as the render pass. The meta and tag
|
||||||
|
plugins need to use the new scan parameter, so will any others that modify
|
||||||
|
%links.
|
||||||
|
* Now that links are calculated in a separate pass, it can also
|
||||||
|
precalculate backlinks in one pass, which is O(N^2) instead of the
|
||||||
|
previous code that was O(N^3). A very nice speedup for wikis with lots
|
||||||
|
(thousands) of pages.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Fri, 27 Oct 2006 23:21:35 -0400
|
-- Joey Hess <joeyh@debian.org> Fri, 27 Oct 2006 23:27:29 -0400
|
||||||
|
|
||||||
ikiwiki (1.31) unstable; urgency=low
|
ikiwiki (1.31) unstable; urgency=low
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
[[template id=plugin name=googlemaps author="Christian Mock"]]
|
[[template id=plugin name=googlemaps author="Christian Mock"]]
|
||||||
[[tag special-purpose]]
|
[[tag type/special-purpose]]
|
||||||
[[meta title="googlemaps (third-party plugin)"]]
|
[[meta title="googlemaps (third-party plugin)"]]
|
||||||
|
|
||||||
`googlemaps` is a plugin that allows using the [Google Maps API][2]
|
`googlemaps` is a plugin that allows using the [Google Maps API][2]
|
||||||
|
|
|
@ -30,6 +30,12 @@ hook, a "id" paramter, which should be a unique string for this plugin, and
|
||||||
a "call" parameter, which is a reference to a function to call for the
|
a "call" parameter, which is a reference to a function to call for the
|
||||||
hook.
|
hook.
|
||||||
|
|
||||||
|
An optional "scan" parameter, if set to a true value, makes the hook be
|
||||||
|
called during the preliminary scan that ikiwiki makes of updated pages,
|
||||||
|
before begining to render pages. This parameter should be set to true if
|
||||||
|
the hook modifies data in `%links`. Note that doing so will make the hook
|
||||||
|
be run twice per page build, so avoid doing it for expensive hooks.
|
||||||
|
|
||||||
## Types of hooks
|
## Types of hooks
|
||||||
|
|
||||||
In roughly the order they are called.
|
In roughly the order they are called.
|
||||||
|
@ -64,6 +70,14 @@ Runs on the raw source of a page, before anything else touches it, and can
|
||||||
make arbitrary changes. The function is passed named parameters `page` and
|
make arbitrary changes. The function is passed named parameters `page` and
|
||||||
`content` and should return the filtered content.
|
`content` and should return the filtered content.
|
||||||
|
|
||||||
|
### scan
|
||||||
|
|
||||||
|
hook(type => "scan", id => "foo", call => \&scan);
|
||||||
|
|
||||||
|
This is identical to a preprocess hook (see below), except that it is
|
||||||
|
called in the initial pass that scans pages for data that will be used in
|
||||||
|
later passes. Scan hooks are the only hook that should modify
|
||||||
|
|
||||||
### preprocess
|
### preprocess
|
||||||
|
|
||||||
Adding a [[PreProcessorDirective]] is probably the most common use of a
|
Adding a [[PreProcessorDirective]] is probably the most common use of a
|
||||||
|
|
|
@ -18,7 +18,7 @@ Released 29 April 2006.
|
||||||
* [[Tags]] _(status: fair)_
|
* [[Tags]] _(status: fair)_
|
||||||
* Should have fully working [[todo/utf8]] support. _(status: good)_
|
* Should have fully working [[todo/utf8]] support. _(status: good)_
|
||||||
* [[Optimised_rendering|todo/optimisations]] if possible. Deal with other
|
* [[Optimised_rendering|todo/optimisations]] if possible. Deal with other
|
||||||
scalability issues. _(status: something like 9x speedup 1.0!)_
|
scalability issues. _(status: should be faster, need to get numbers)_
|
||||||
* Improved [[todo/html]] stylesheets and templates.
|
* Improved [[todo/html]] stylesheets and templates.
|
||||||
* Improved scalable [[logo]]. _(status: done)_
|
* Improved scalable [[logo]]. _(status: done)_
|
||||||
* Support for at other revision control systems aside from svn.
|
* Support for at other revision control systems aside from svn.
|
||||||
|
|
|
@ -4,18 +4,6 @@
|
||||||
* Look at splitting up CGI.pm. But note that too much splitting can slow
|
* Look at splitting up CGI.pm. But note that too much splitting can slow
|
||||||
perl down.
|
perl down.
|
||||||
|
|
||||||
* The backlinks code turns out to scale badly to wikis with thousands of
|
* The backlinks calculation code is still O(N^2) on the number of pages.
|
||||||
pages. The code is O(N^2)! It's called for each page, and it loops
|
If backlinks info were stored in the index file, it would go down to
|
||||||
through all the pages to find backlinks.
|
constant time for iterative builds, though still N^2 for rebuilds.
|
||||||
|
|
||||||
Need to find a way to calculate and cache all the backlinks in one pass,
|
|
||||||
which could be done in at worst O(N), and possibly less (if they're
|
|
||||||
stored in the index, it could be constant time). But to do this, there
|
|
||||||
would need to be a way to invalidate or update the cache in these
|
|
||||||
situations:
|
|
||||||
|
|
||||||
- A page is added. Note that this can change a backlink to point to
|
|
||||||
the new page instead of the page it pointed to before.
|
|
||||||
- A page is deleted. This can also change backlinks that pointed to that
|
|
||||||
page.
|
|
||||||
- A page is modified. Links added/removed.
|
|
||||||
|
|
Loading…
Reference in New Issue