optimization: pagespec_match_list with no num limit matches before sorting

This can be a lot faster, since huge numbers of pages are not sorted
only to mostly be thrown away. It sped up a build of my blog by at least
5 minutes.
master
Joey Hess 2010-04-11 01:30:03 -04:00
parent cecbd52938
commit 0bfc364a7d
3 changed files with 34 additions and 13 deletions

View File

@ -1951,8 +1951,9 @@ sub add_link ($$;$) {
}
}
sub sortspec_translate ($) {
sub sortspec_translate ($$) {
my $spec = shift;
my $reverse = shift;
my $code = "";
my @data;
@ -2007,6 +2008,10 @@ sub sortspec_translate ($) {
return sub { 0 };
}
if ($reverse) {
$code="-($code)";
}
no warnings;
return eval 'sub { '.$code.' }';
}
@ -2109,18 +2114,20 @@ sub pagespec_match_list ($$;@) {
? grep { ! $params{filter}->($_) } keys %pagesources
: keys %pagesources;
}
if (defined $params{sort}) {
@candidates = IkiWiki::SortSpec::sort_pages($params{sort},
@candidates);
my $num=$params{num};
my $sort=$params{sort};
my $reverse=$params{reverse};
# when only the top matches will be returned, it's efficient to
# sort before matching to pagespec,
if (defined $num && defined $sort) {
@candidates=IkiWiki::SortSpec::sort_pages(
$sort, $reverse, @candidates);
}
@candidates=reverse(@candidates) if $params{reverse};
$depends{$page}{$pagespec} |= ($params{deptype} || $DEPEND_CONTENT);
# clear params, remainder is passed to pagespec
my $num=$params{num};
delete @params{qw{num deptype reverse sort filter list}};
my @matches;
@ -2144,7 +2151,15 @@ sub pagespec_match_list ($$;@) {
$depends_simple{$page}{lc $k} |= $i->{$k};
}
return @matches;
# when all matches will be returned, it's efficient to
# sort after matching
if (! defined $num && defined $sort) {
return IkiWiki::SortSpec::sort_pages(
$sort, $reverse, @matches);
}
else {
return @matches;
}
}
sub pagespec_valid ($) {
@ -2437,9 +2452,8 @@ package IkiWiki::SortSpec;
# This is in the SortSpec namespace so that the $a and $b that sort() uses
# are easily available in this namespace, for cmp functions to use them.
sub sort_pages {
my $f = IkiWiki::sortspec_translate(shift);
return sort $f @_;
my $f=IkiWiki::sortspec_translate(shift, shift);
sort $f @_
}
sub cmp_title {

View File

@ -29,5 +29,8 @@ date range.) Well, in this case, it *does* make sense to flip it, limit by
pagespe first, and do a (quick)sort second. (No influence complications,
either.)
> Flipping when there's no limit implemented, and it knocked 1/3 off
> the rebuild time of my blog's archive pages. --[[Joey]]
Adding these special cases will be more complicated, but I think the best
of both worlds. --[[Joey]]

View File

@ -1,7 +1,7 @@
#!/usr/bin/perl
use warnings;
use strict;
use Test::More tests => 90;
use Test::More tests => 92;
BEGIN { use_ok("IkiWiki"); }
@ -38,12 +38,16 @@ $links{foo3}=[qw{bar}];
is_deeply([pagespec_match_list("foo", "bar")], ["bar"]);
is_deeply([sort(pagespec_match_list("foo", "* and !post/*"))], ["bar", "foo", "foo2", "foo3"]);
is_deeply([sort(pagespec_match_list("foo", "post/*"))], ["post/1", "post/2", "post/3"]);
is_deeply([pagespec_match_list("foo", "post/*", sort => "title")],
["post/1", "post/2", "post/3"]);
is_deeply([pagespec_match_list("foo", "post/*", sort => "title", reverse => 1)],
["post/3", "post/2", "post/1"]);
is_deeply([pagespec_match_list("foo", "post/*", sort => "title", num => 2)],
["post/1", "post/2"]);
is_deeply([pagespec_match_list("foo", "post/*", sort => "title", num => 50)],
["post/1", "post/2", "post/3"]);
is_deeply([pagespec_match_list("foo", "post/*", sort => "title", num => 50, reverse => 1)],
["post/3", "post/2", "post/1"]);
is_deeply([pagespec_match_list("foo", "post/*", sort => "title",
filter => sub { $_[0] =~ /3/}) ],
["post/1", "post/2"]);