* Add a separate pass to find page links, and only render each page once,
instead of over and over. This is up to 8 times faster than before! (This could have introduced some subtle bugs, so it needs to be tested extensively.)master
parent
05fe79b487
commit
49bf877701
|
@ -13,6 +13,7 @@ sub backlinks ($) { #{{{
|
||||||
my @links;
|
my @links;
|
||||||
foreach my $p (keys %links) {
|
foreach my $p (keys %links) {
|
||||||
next if bestlink($page, $p) eq $page;
|
next if bestlink($page, $p) eq $page;
|
||||||
|
|
||||||
if (grep { length $_ && bestlink($p, $_) eq $page } @{$links{$p}}) {
|
if (grep { length $_ && bestlink($p, $_) eq $page } @{$links{$p}}) {
|
||||||
my $href=abs2rel(htmlpage($p), dirname($page));
|
my $href=abs2rel(htmlpage($p), dirname($page));
|
||||||
|
|
||||||
|
@ -119,21 +120,25 @@ sub mtime ($) { #{{{
|
||||||
return (stat($file))[9];
|
return (stat($file))[9];
|
||||||
} #}}}
|
} #}}}
|
||||||
|
|
||||||
sub findlinks ($$) { #{{{
|
sub scan ($) { #{{{
|
||||||
my $page=shift;
|
my $file=shift;
|
||||||
my $content=shift;
|
|
||||||
|
|
||||||
my @links;
|
my $type=pagetype($file);
|
||||||
while ($content =~ /(?<!\\)$config{wiki_link_regexp}/g) {
|
if (defined $type) {
|
||||||
push @links, titlepage($2);
|
my $srcfile=srcfile($file);
|
||||||
}
|
my $content=readfile($srcfile);
|
||||||
if ($config{discussion}) {
|
my $page=pagename($file);
|
||||||
# Discussion links are a special case since they're not in the
|
|
||||||
# text of the page, but on its template.
|
my @links;
|
||||||
return @links, "$page/discussion";
|
while ($content =~ /(?<!\\)$config{wiki_link_regexp}/g) {
|
||||||
}
|
push @links, titlepage($2);
|
||||||
else {
|
}
|
||||||
return @links;
|
if ($config{discussion}) {
|
||||||
|
# Discussion links are a special case since they're not in the
|
||||||
|
# text of the page, but on its template.
|
||||||
|
push @links, "$page/discussion";
|
||||||
|
}
|
||||||
|
$links{$page}=\@links;
|
||||||
}
|
}
|
||||||
} #}}}
|
} #}}}
|
||||||
|
|
||||||
|
@ -149,9 +154,6 @@ sub render ($) { #{{{
|
||||||
will_render($page, htmlpage($page), 1);
|
will_render($page, htmlpage($page), 1);
|
||||||
|
|
||||||
$content=filter($page, $content);
|
$content=filter($page, $content);
|
||||||
|
|
||||||
$links{$page}=[findlinks($page, $content)];
|
|
||||||
|
|
||||||
$content=preprocess($page, $page, $content);
|
$content=preprocess($page, $page, $content);
|
||||||
$content=linkify($page, $page, $content);
|
$content=linkify($page, $page, $content);
|
||||||
$content=htmlize($page, $type, $content);
|
$content=htmlize($page, $type, $content);
|
||||||
|
@ -162,7 +164,6 @@ sub render ($) { #{{{
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
my $content=readfile($srcfile, 1);
|
my $content=readfile($srcfile, 1);
|
||||||
$links{$file}=[];
|
|
||||||
delete $depends{$file};
|
delete $depends{$file};
|
||||||
will_render($file, $file, 1);
|
will_render($file, $file, 1);
|
||||||
writefile($file, $config{destdir}, $content, 1);
|
writefile($file, $config{destdir}, $content, 1);
|
||||||
|
@ -238,9 +239,8 @@ sub refresh () { #{{{
|
||||||
foreach my $file (@files) {
|
foreach my $file (@files) {
|
||||||
my $page=pagename($file);
|
my $page=pagename($file);
|
||||||
if (! $oldpagemtime{$page}) {
|
if (! $oldpagemtime{$page}) {
|
||||||
debug("new page $page") unless exists $pagectime{$page};
|
|
||||||
push @add, $file;
|
push @add, $file;
|
||||||
$links{$page}=[];
|
scan($file);
|
||||||
$pagecase{lc $page}=$page;
|
$pagecase{lc $page}=$page;
|
||||||
$pagesources{$page}=$file;
|
$pagesources{$page}=$file;
|
||||||
if ($config{getctime} && -e "$config{srcdir}/$file") {
|
if ($config{getctime} && -e "$config{srcdir}/$file") {
|
||||||
|
@ -256,6 +256,7 @@ sub refresh () { #{{{
|
||||||
if (! $exists{$page}) {
|
if (! $exists{$page}) {
|
||||||
debug("removing old page $page");
|
debug("removing old page $page");
|
||||||
push @del, $pagesources{$page};
|
push @del, $pagesources{$page};
|
||||||
|
$links{$page}=[];
|
||||||
$renderedfiles{$page}=[];
|
$renderedfiles{$page}=[];
|
||||||
$oldpagemtime{$page}=0;
|
$oldpagemtime{$page}=0;
|
||||||
prune($config{destdir}."/".$_)
|
prune($config{destdir}."/".$_)
|
||||||
|
@ -264,6 +265,18 @@ sub refresh () { #{{{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# scan updated files to update info about them
|
||||||
|
foreach my $file (@files) {
|
||||||
|
my $page=pagename($file);
|
||||||
|
|
||||||
|
if (! exists $oldpagemtime{$page} ||
|
||||||
|
mtime(srcfile($file)) > $oldpagemtime{$page} ||
|
||||||
|
$forcerebuild{$page}) {
|
||||||
|
debug("scanning $file");
|
||||||
|
scan($file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
# render any updated files
|
# render any updated files
|
||||||
foreach my $file (@files) {
|
foreach my $file (@files) {
|
||||||
my $page=pagename($file);
|
my $page=pagename($file);
|
||||||
|
@ -278,12 +291,10 @@ sub refresh () { #{{{
|
||||||
}
|
}
|
||||||
|
|
||||||
# if any files were added or removed, check to see if each page
|
# if any files were added or removed, check to see if each page
|
||||||
# needs an update due to linking to them or inlining them.
|
# needs an update due to linking to them or inlining them
|
||||||
# TODO: inefficient; pages may get rendered above and again here;
|
|
||||||
# problem is the bestlink may have changed and we won't know until
|
|
||||||
# now
|
|
||||||
if (@add || @del) {
|
if (@add || @del) {
|
||||||
FILE: foreach my $file (@files) {
|
FILE: foreach my $file (@files) {
|
||||||
|
next if $rendered{$file};
|
||||||
my $page=pagename($file);
|
my $page=pagename($file);
|
||||||
foreach my $f (@add, @del) {
|
foreach my $f (@add, @del) {
|
||||||
my $p=pagename($f);
|
my $p=pagename($f);
|
||||||
|
@ -301,11 +312,9 @@ FILE: foreach my $file (@files) {
|
||||||
|
|
||||||
# Handle backlinks; if a page has added/removed links, update the
|
# Handle backlinks; if a page has added/removed links, update the
|
||||||
# pages it links to. Also handles rebuilding dependant pages.
|
# pages it links to. Also handles rebuilding dependant pages.
|
||||||
# TODO: inefficient; pages may get rendered above and again here;
|
|
||||||
# problem is the backlinks could be wrong in the first pass render
|
|
||||||
# above
|
|
||||||
if (%rendered || @del) {
|
if (%rendered || @del) {
|
||||||
foreach my $f (@files) {
|
foreach my $f (@files) {
|
||||||
|
next if $rendered{$f};
|
||||||
my $p=pagename($f);
|
my $p=pagename($f);
|
||||||
if (exists $depends{$p}) {
|
if (exists $depends{$p}) {
|
||||||
foreach my $file (keys %rendered, @del) {
|
foreach my $file (keys %rendered, @del) {
|
||||||
|
@ -347,6 +356,7 @@ FILE: foreach my $file (@files) {
|
||||||
foreach my $link (keys %linkchanged) {
|
foreach my $link (keys %linkchanged) {
|
||||||
my $linkfile=$pagesources{$link};
|
my $linkfile=$pagesources{$link};
|
||||||
if (defined $linkfile) {
|
if (defined $linkfile) {
|
||||||
|
next if $rendered{$linkfile};
|
||||||
debug("rendering $linkfile, to update its backlinks");
|
debug("rendering $linkfile, to update its backlinks");
|
||||||
render($linkfile);
|
render($linkfile);
|
||||||
$rendered{$linkfile}=1;
|
$rendered{$linkfile}=1;
|
||||||
|
|
|
@ -1,3 +1,12 @@
|
||||||
|
ikiwiki (1.32) UNRELEASED; urgency=low
|
||||||
|
|
||||||
|
* Add a separate pass to find page links, and only render each page once,
|
||||||
|
instead of over and over. This is up to 8 times faster than before!
|
||||||
|
(This could have introduced some subtle bugs, so it needs to be tested
|
||||||
|
extensively.)
|
||||||
|
|
||||||
|
-- Joey Hess <joeyh@debian.org> Fri, 27 Oct 2006 23:21:35 -0400
|
||||||
|
|
||||||
ikiwiki (1.31) unstable; urgency=low
|
ikiwiki (1.31) unstable; urgency=low
|
||||||
|
|
||||||
* Patch from Pawel Tecza to cp -a the templates in the Makefile.
|
* Patch from Pawel Tecza to cp -a the templates in the Makefile.
|
||||||
|
|
|
@ -14,12 +14,11 @@ Released 29 April 2006.
|
||||||
|
|
||||||
* Unit test suite (with tests of at least core stuff like
|
* Unit test suite (with tests of at least core stuff like
|
||||||
[[PageSpec]]). _(status: exists, could of course use more tests)_
|
[[PageSpec]]). _(status: exists, could of course use more tests)_
|
||||||
* [[Plugins]] _(status: done, interface still not [[quite_stable|todo/firm_up_plugin_interface]])_
|
* [[Plugins]] _(status: done)_
|
||||||
* [[Tags]] _(status: fair)_
|
* [[Tags]] _(status: fair)_
|
||||||
* Should have fully working [[todo/utf8]] support. _(status: good)_
|
* Should have fully working [[todo/utf8]] support. _(status: good)_
|
||||||
* [[Optimised_rendering|todo/optimisations]] if possible. Deal with other
|
* [[Optimised_rendering|todo/optimisations]] if possible. Deal with other
|
||||||
scalability issues. _(status: 45%-60%+ speedup since 1.0, much more
|
scalability issues. _(status: something like 9x speedup 1.0!)_
|
||||||
possible)_
|
|
||||||
* Improved [[todo/html]] stylesheets and templates.
|
* Improved [[todo/html]] stylesheets and templates.
|
||||||
* Improved scalable [[logo]]. _(status: done)_
|
* Improved scalable [[logo]]. _(status: done)_
|
||||||
* Support for at other revision control systems aside from svn.
|
* Support for at other revision control systems aside from svn.
|
||||||
|
|
|
@ -1,25 +1,21 @@
|
||||||
* Render each changed page only once. Currently pages are rendered up to 4
|
|
||||||
times in worst case (8 times if there's an rss feed).
|
|
||||||
|
|
||||||
The issue is that rendering a page is used to gather info like the links
|
|
||||||
on the page (and other stuff) that can effect rendering other pages. So it
|
|
||||||
needs a multi-pass system. But rendering the whole page in each pass is
|
|
||||||
rather obscene.
|
|
||||||
|
|
||||||
It would be better to have the first pass be a data gathering pass. Such
|
|
||||||
a pass would still need to load and parse the page contents etc, but
|
|
||||||
wouldn't need to generate html or write anything to disk.
|
|
||||||
|
|
||||||
One problem with this idea is that it could turn into 2x the work in
|
|
||||||
cases where ikiwiki currently efficiently renders a page just once. And
|
|
||||||
caching between the passes to avoid that wouldn't do good things to the
|
|
||||||
memory footprint.
|
|
||||||
|
|
||||||
Might be best to just do a partial first pass, getting eg, the page links
|
|
||||||
up-to-date, and then multiple, but generally fewer, rendering passes.
|
|
||||||
|
|
||||||
* Don't render blog archive pages unless a page is added/removed. Just
|
* Don't render blog archive pages unless a page is added/removed. Just
|
||||||
changing a page doesn't affect the archives as they show only the title.
|
changing a page doesn't affect the archives as they show only the title.
|
||||||
|
|
||||||
* Look at splitting up CGI.pm. But note that too much splitting can slow
|
* Look at splitting up CGI.pm. But note that too much splitting can slow
|
||||||
perl down.
|
perl down.
|
||||||
|
|
||||||
|
* The backlinks code turns out to scale badly to wikis with thousands of
|
||||||
|
pages. The code is O(N^2)! It's called for each page, and it loops
|
||||||
|
through all the pages to find backlinks.
|
||||||
|
|
||||||
|
Need to find a way to calculate and cache all the backlinks in one pass,
|
||||||
|
which could be done in at worst O(N), and possibly less (if they're
|
||||||
|
stored in the index, it could be constant time). But to do this, there
|
||||||
|
would need to be a way to invalidate or update the cache in these
|
||||||
|
situations:
|
||||||
|
|
||||||
|
- A page is added. Note that this can change a backlink to point to
|
||||||
|
the new page instead of the page it pointed to before.
|
||||||
|
- A page is deleted. This can also change backlinks that pointed to that
|
||||||
|
page.
|
||||||
|
- A page is modified. Links added/removed.
|
||||||
|
|
Loading…
Reference in New Issue