Added only_committed_changes config setting, which speeds up wiki refresh by querying git to find the files that were changed, rather than looking at the work tree. Not enabled by default as it can break some setups where not all files get committed to git.

master
Joey Hess 2013-11-16 17:26:20 -04:00
parent 3aaa33064c
commit 654530fa8b
7 changed files with 171 additions and 20 deletions

View File

@ -14,7 +14,7 @@ use vars qw{%config %links %oldlinks %pagemtime %pagectime %pagecase
%pagestate %wikistate %renderedfiles %oldrenderedfiles
%pagesources %delpagesources %destsources %depends %depends_simple
@mass_depends %hooks %forcerebuild %loaded_plugins %typedlinks
%oldtypedlinks %autofiles};
%oldtypedlinks %autofiles @underlayfiles $lastrev};
use Exporter q{import};
our @EXPORT = qw(hook debug error htmlpage template template_depends
@ -134,6 +134,13 @@ sub getsetup () {
safe => 1,
rebuild => 0,
},
only_committed_changes => {
type => "boolean",
default => 0,
description => "enable optimization of only refreshing committed changes?",
safe => 1,
rebuild => 0,
},
rcs => {
type => "string",
default => '',
@ -1881,6 +1888,8 @@ sub loadindex () {
foreach my $page (keys %renderedfiles) {
$destsources{$_}=$page foreach @{$renderedfiles{$page}};
}
$lastrev=$index->{lastrev};
@underlayfiles=@{$index->{underlayfiles}} if ref $index->{underlayfiles};
return close($in);
}
@ -1934,6 +1943,9 @@ sub saveindex () {
}
}
$index{lastrev}=$lastrev;
$index{underlayfiles}=\@underlayfiles;
$index{version}="3";
my $ret=Storable::nstore_fd(\%index, $out);
return if ! defined $ret || ! $ret;

View File

@ -29,6 +29,8 @@ sub import {
hook(type => "rcs", id => "rcs_receive", call => \&rcs_receive);
hook(type => "rcs", id => "rcs_preprevert", call => \&rcs_preprevert);
hook(type => "rcs", id => "rcs_revert", call => \&rcs_revert);
hook(type => "rcs", id => "rcs_find_changes", call => \&rcs_find_changes);
hook(type => "rcs", id => "rcs_get_current_rev", call => \&rcs_get_current_rev);
}
sub checkconfig () {
@ -462,19 +464,56 @@ sub git_commit_info ($;$) {
return wantarray ? @ci : $ci[0];
}
sub git_sha1 (;$) {
# Return head sha1sum (of given file).
my $file = shift || q{--};
sub rcs_find_changes ($) {
my $oldrev=shift;
my @raw_lines = run_or_die('git', 'log',
'--pretty=raw', '--raw', '--abbrev=40', '--always', '-c',
'--no-renames', , '--reverse',
'-r', "$oldrev..HEAD", '--', '.');
# Due to --reverse, we see changes in chronological order.
my %changed;
my %deleted;
my $nullsha = 0 x 40;
my $newrev;
while (my $ci = parse_diff_tree(\@raw_lines)) {
$newrev=$ci->{sha1};
foreach my $i (@{$ci->{details}}) {
my $file=$i->{file};
if ($i->{sha1_to} == $nullsha) {
delete $changed{$file};
$deleted{$file}=1;
}
else {
delete $deleted{$file};
$changed{$file}=1;
}
}
}
return (\%changed, \%deleted, $newrev);
}
sub git_sha1_file ($) {
my $file=shift;
git_sha1("--", $file);
}
sub git_sha1 (@) {
# Ignore error since a non-existing file might be given.
my ($sha1) = run_or_non('git', 'rev-list', '--max-count=1', 'HEAD',
'--', $file);
'--', @_);
if (defined $sha1) {
($sha1) = $sha1 =~ m/($sha1_pattern)/; # sha1 is untainted now
}
return defined $sha1 ? $sha1 : '';
}
sub rcs_get_current_rev () {
git_sha1();
}
sub rcs_update () {
# Update working directory.
@ -488,7 +527,7 @@ sub rcs_prepedit ($) {
# This will be later used in rcs_commit if a merge is required.
my ($file) = @_;
return git_sha1($file);
return git_sha1_file($file);
}
sub rcs_commit (@) {
@ -499,7 +538,7 @@ sub rcs_commit (@) {
# Check to see if the page has been changed by someone else since
# rcs_prepedit was called.
my $cur = git_sha1($params{file});
my $cur = git_sha1_file($params{file});
my ($prev) = $params{token} =~ /^($sha1_pattern)$/; # untaint
if (defined $cur && defined $prev && $cur ne $prev) {

View File

@ -287,9 +287,22 @@ sub srcdir_check () {
}
sub find_src_files () {
# Finds all files in the srcdir, and the underlaydirs.
# Returns the files, and their corresponding pages.
#
# When run in only_underlay mode, adds only the underlay files to
# the files and pages passed in.
sub find_src_files (;$$$) {
my $only_underlay=shift;
my @files;
if (defined $_[0]) {
@files=@{shift()};
}
my %pages;
if (defined $_[0]) {
%pages=%{shift()};
}
eval q{use File::Find};
error($@) if $@;
@ -298,6 +311,8 @@ sub find_src_files () {
my $origdir=getcwd();
my $abssrcdir=Cwd::abs_path($config{srcdir});
@IkiWiki::underlayfiles=();
my ($page, $underlay);
my $helper=sub {
my $file=decode_utf8($_);
@ -323,6 +338,7 @@ sub find_src_files () {
if (! -l "$abssrcdir/$f" && ! -e _) {
if (! $pages{$page}) {
push @files, $f;
push @IkiWiki::underlayfiles, $f;
$pages{$page}=1;
}
}
@ -336,12 +352,14 @@ sub find_src_files () {
}
};
unless ($only_underlay) {
chdir($config{srcdir}) || die "chdir $config{srcdir}: $!";
find({
no_chdir => 1,
wanted => $helper,
}, '.');
chdir($origdir) || die "chdir $origdir: $!";
}
$underlay=1;
foreach (@{$config{underlaydirs}}, $config{underlaydir}) {
@ -357,6 +375,50 @@ sub find_src_files () {
return \@files, \%pages;
}
# Given a hash of files that have changed, and a hash of files that were
# deleted, should return the same results as find_src_files, with the same
# sanity checks. But a lot faster!
sub process_changed_files ($$) {
my $changed_raw=shift;
my $deleted_raw=shift;
my @files;
my %pages;
foreach my $file (keys %$changed_raw) {
my $page = pagename($file);
next if ! exists $pagesources{$page} && file_pruned($file);
my ($f) = $file =~ /$config{wiki_file_regexp}/; # untaint
if (! defined $f) {
warn(sprintf(gettext("skipping bad filename %s"), $file)."\n");
next;
}
push @files, $f;
if ($pages{$page}) {
debug(sprintf(gettext("%s has multiple possible source pages"), $page));
}
$pages{$page}=1;
}
# So far, we only have the changed files. Now add in all the old
# files that were not changed or deleted, excluding ones that came
# from the underlay.
my %old_underlay;
foreach my $f (@IkiWiki::underlayfiles) {
$old_underlay{$f}=1;
}
foreach my $page (keys %pagesources) {
my $f=$pagesources{$page};
unless ($old_underlay{$f} || exists $pages{$page} || exists $deleted_raw->{$f}) {
$pages{$page}=1;
push @files, $f;
}
}
# add in the underlay
find_src_files(1, \@files, \%pages);
}
sub find_new_files ($) {
my $files=shift;
my @new;
@ -762,14 +824,32 @@ sub gen_autofile ($$$) {
return 1;
}
sub want_find_changes {
$config{only_committed_changes} &&
exists $IkiWiki::hooks{rcs}{rcs_find_changes} &&
exists $IkiWiki::hooks{rcs}{rcs_get_current_rev}
}
sub refresh () {
srcdir_check();
run_hooks(refresh => sub { shift->() });
my ($files, $pages)=find_src_files();
my ($new, $internal_new)=find_new_files($files);
my ($del, $internal_del)=find_del_files($pages);
my ($changed, $internal_changed)=find_changed($files);
my ($files, $pages, $new, $internal_new, $del, $internal_del, $changed, $internal_changed);
if (! $config{rebuild} && want_find_changes() && defined $IkiWiki::lastrev) {
my ($changed_raw, $del_raw);
($changed_raw, $del_raw, $IkiWiki::lastrev) = $IkiWiki::hooks{rcs}{rcs_find_changes}{call}->($IkiWiki::lastrev);
($files, $pages)=process_changed_files($changed_raw, $del_raw);
}
else {
($files, $pages)=find_src_files();
}
if (want_find_changes()) {
if (! defined($IkiWiki::lastrev)) {
$IkiWiki::lastrev=$IkiWiki::hooks{rcs}{rcs_get_current_rev}{call}->();
}
}
($new, $internal_new)=find_new_files($files);
($del, $internal_del)=find_del_files($pages);
($changed, $internal_changed)=find_changed($files);
my %existingfiles;
run_hooks(needsbuild => sub {
my $ret=shift->($changed, [@$del, @$internal_del]);

6
debian/changelog vendored
View File

@ -5,7 +5,11 @@ ikiwiki (3.20130904.2) UNRELEASED; urgency=low
* Fixed unncessary tight loop hash copy in saveindex where a pointer
can be used instead. Can speed up refreshes by nearly 50% in some
circumstances.
* Optmised loadindex by caching the page name in the index.
* Optimized loadindex by caching the page name in the index.
* Added only_committed_changes config setting, which speeds up wiki
refresh by querying git to find the files that were changed, rather
than looking at the work tree. Not enabled by default as it can
break some setups where not all files get committed to git.
-- Joey Hess <joeyh@debian.org> Thu, 05 Sep 2013 10:01:10 -0400

View File

@ -1259,6 +1259,20 @@ and an error message on failure.
This hook and `rcs_preprevert` are optional, if not implemented, no revert
web interface will be available.
### `rcs_find_changes($)`
Finds changes committed since the passed RCS-specific rev. Returns
a hash of the files changed, a hash of the files deleted, and the
current rev.
This hook is optional.
### `rcs_get_current_rev()`
Gets a RCS-specific rev, which can later be passed to `rcs_find_changes`.
This hook is optional.
### PageSpec plugins
It's also possible to write plugins that add new functions to

View File

@ -23,3 +23,5 @@ to be scanned as now, as would the transient directory. Since some sites
may depend on files being put into the tree and not committed, this
optimisation would probably need to be something that can be optionally
enabled.
> [[done]] --[[Joey]]

View File

@ -1,5 +1,5 @@
Name: ikiwiki
Version: 3.20130904.1
Version: 3.20130904.2
Release: 1%{?dist}
Summary: A wiki compiler