From 654530fa8bb0937123ed526e3093170ef23f5295 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 16 Nov 2013 17:26:20 -0400 Subject: [PATCH] Added only_committed_changes config setting, which speeds up wiki refresh by querying git to find the files that were changed, rather than looking at the work tree. Not enabled by default as it can break some setups where not all files get committed to git. --- IkiWiki.pm | 14 +++- IkiWiki/Plugin/git.pm | 51 +++++++++++-- IkiWiki/Render.pm | 102 ++++++++++++++++++++++--- debian/changelog | 6 +- doc/plugins/write.mdwn | 14 ++++ doc/todo/optimisation_via_git_log.mdwn | 2 + ikiwiki.spec | 2 +- 7 files changed, 171 insertions(+), 20 deletions(-) diff --git a/IkiWiki.pm b/IkiWiki.pm index af8e07d7f..b7080bb0b 100644 --- a/IkiWiki.pm +++ b/IkiWiki.pm @@ -14,7 +14,7 @@ use vars qw{%config %links %oldlinks %pagemtime %pagectime %pagecase %pagestate %wikistate %renderedfiles %oldrenderedfiles %pagesources %delpagesources %destsources %depends %depends_simple @mass_depends %hooks %forcerebuild %loaded_plugins %typedlinks - %oldtypedlinks %autofiles}; + %oldtypedlinks %autofiles @underlayfiles $lastrev}; use Exporter q{import}; our @EXPORT = qw(hook debug error htmlpage template template_depends @@ -134,6 +134,13 @@ sub getsetup () { safe => 1, rebuild => 0, }, + only_committed_changes => { + type => "boolean", + default => 0, + description => "enable optimization of only refreshing committed changes?", + safe => 1, + rebuild => 0, + }, rcs => { type => "string", default => '', @@ -1881,6 +1888,8 @@ sub loadindex () { foreach my $page (keys %renderedfiles) { $destsources{$_}=$page foreach @{$renderedfiles{$page}}; } + $lastrev=$index->{lastrev}; + @underlayfiles=@{$index->{underlayfiles}} if ref $index->{underlayfiles}; return close($in); } @@ -1934,6 +1943,9 @@ sub saveindex () { } } + $index{lastrev}=$lastrev; + $index{underlayfiles}=\@underlayfiles; + $index{version}="3"; my $ret=Storable::nstore_fd(\%index, $out); return if ! defined $ret || ! $ret; diff --git a/IkiWiki/Plugin/git.pm b/IkiWiki/Plugin/git.pm index bbdcbdba8..1dbf00d55 100644 --- a/IkiWiki/Plugin/git.pm +++ b/IkiWiki/Plugin/git.pm @@ -29,6 +29,8 @@ sub import { hook(type => "rcs", id => "rcs_receive", call => \&rcs_receive); hook(type => "rcs", id => "rcs_preprevert", call => \&rcs_preprevert); hook(type => "rcs", id => "rcs_revert", call => \&rcs_revert); + hook(type => "rcs", id => "rcs_find_changes", call => \&rcs_find_changes); + hook(type => "rcs", id => "rcs_get_current_rev", call => \&rcs_get_current_rev); } sub checkconfig () { @@ -462,19 +464,56 @@ sub git_commit_info ($;$) { return wantarray ? @ci : $ci[0]; } -sub git_sha1 (;$) { - # Return head sha1sum (of given file). - my $file = shift || q{--}; +sub rcs_find_changes ($) { + my $oldrev=shift; + my @raw_lines = run_or_die('git', 'log', + '--pretty=raw', '--raw', '--abbrev=40', '--always', '-c', + '--no-renames', , '--reverse', + '-r', "$oldrev..HEAD", '--', '.'); + + # Due to --reverse, we see changes in chronological order. + my %changed; + my %deleted; + my $nullsha = 0 x 40; + my $newrev; + while (my $ci = parse_diff_tree(\@raw_lines)) { + $newrev=$ci->{sha1}; + foreach my $i (@{$ci->{details}}) { + my $file=$i->{file}; + if ($i->{sha1_to} == $nullsha) { + delete $changed{$file}; + $deleted{$file}=1; + } + else { + delete $deleted{$file}; + $changed{$file}=1; + } + } + } + + return (\%changed, \%deleted, $newrev); +} + +sub git_sha1_file ($) { + my $file=shift; + git_sha1("--", $file); +} + +sub git_sha1 (@) { # Ignore error since a non-existing file might be given. my ($sha1) = run_or_non('git', 'rev-list', '--max-count=1', 'HEAD', - '--', $file); + '--', @_); if (defined $sha1) { ($sha1) = $sha1 =~ m/($sha1_pattern)/; # sha1 is untainted now } return defined $sha1 ? $sha1 : ''; } +sub rcs_get_current_rev () { + git_sha1(); +} + sub rcs_update () { # Update working directory. @@ -488,7 +527,7 @@ sub rcs_prepedit ($) { # This will be later used in rcs_commit if a merge is required. my ($file) = @_; - return git_sha1($file); + return git_sha1_file($file); } sub rcs_commit (@) { @@ -499,7 +538,7 @@ sub rcs_commit (@) { # Check to see if the page has been changed by someone else since # rcs_prepedit was called. - my $cur = git_sha1($params{file}); + my $cur = git_sha1_file($params{file}); my ($prev) = $params{token} =~ /^($sha1_pattern)$/; # untaint if (defined $cur && defined $prev && $cur ne $prev) { diff --git a/IkiWiki/Render.pm b/IkiWiki/Render.pm index 58b374ad7..60424bbda 100644 --- a/IkiWiki/Render.pm +++ b/IkiWiki/Render.pm @@ -287,9 +287,22 @@ sub srcdir_check () { } -sub find_src_files () { +# Finds all files in the srcdir, and the underlaydirs. +# Returns the files, and their corresponding pages. +# +# When run in only_underlay mode, adds only the underlay files to +# the files and pages passed in. +sub find_src_files (;$$$) { + my $only_underlay=shift; my @files; + if (defined $_[0]) { + @files=@{shift()}; + } my %pages; + if (defined $_[0]) { + %pages=%{shift()}; + } + eval q{use File::Find}; error($@) if $@; @@ -297,6 +310,8 @@ sub find_src_files () { die $@ if $@; my $origdir=getcwd(); my $abssrcdir=Cwd::abs_path($config{srcdir}); + + @IkiWiki::underlayfiles=(); my ($page, $underlay); my $helper=sub { @@ -323,6 +338,7 @@ sub find_src_files () { if (! -l "$abssrcdir/$f" && ! -e _) { if (! $pages{$page}) { push @files, $f; + push @IkiWiki::underlayfiles, $f; $pages{$page}=1; } } @@ -336,12 +352,14 @@ sub find_src_files () { } }; - chdir($config{srcdir}) || die "chdir $config{srcdir}: $!"; - find({ - no_chdir => 1, - wanted => $helper, - }, '.'); - chdir($origdir) || die "chdir $origdir: $!"; + unless ($only_underlay) { + chdir($config{srcdir}) || die "chdir $config{srcdir}: $!"; + find({ + no_chdir => 1, + wanted => $helper, + }, '.'); + chdir($origdir) || die "chdir $origdir: $!"; + } $underlay=1; foreach (@{$config{underlaydirs}}, $config{underlaydir}) { @@ -357,6 +375,50 @@ sub find_src_files () { return \@files, \%pages; } +# Given a hash of files that have changed, and a hash of files that were +# deleted, should return the same results as find_src_files, with the same +# sanity checks. But a lot faster! +sub process_changed_files ($$) { + my $changed_raw=shift; + my $deleted_raw=shift; + + my @files; + my %pages; + + foreach my $file (keys %$changed_raw) { + my $page = pagename($file); + next if ! exists $pagesources{$page} && file_pruned($file); + my ($f) = $file =~ /$config{wiki_file_regexp}/; # untaint + if (! defined $f) { + warn(sprintf(gettext("skipping bad filename %s"), $file)."\n"); + next; + } + push @files, $f; + if ($pages{$page}) { + debug(sprintf(gettext("%s has multiple possible source pages"), $page)); + } + $pages{$page}=1; + } + + # So far, we only have the changed files. Now add in all the old + # files that were not changed or deleted, excluding ones that came + # from the underlay. + my %old_underlay; + foreach my $f (@IkiWiki::underlayfiles) { + $old_underlay{$f}=1; + } + foreach my $page (keys %pagesources) { + my $f=$pagesources{$page}; + unless ($old_underlay{$f} || exists $pages{$page} || exists $deleted_raw->{$f}) { + $pages{$page}=1; + push @files, $f; + } + } + + # add in the underlay + find_src_files(1, \@files, \%pages); +} + sub find_new_files ($) { my $files=shift; my @new; @@ -762,14 +824,32 @@ sub gen_autofile ($$$) { return 1; } +sub want_find_changes { + $config{only_committed_changes} && + exists $IkiWiki::hooks{rcs}{rcs_find_changes} && + exists $IkiWiki::hooks{rcs}{rcs_get_current_rev} +} sub refresh () { srcdir_check(); run_hooks(refresh => sub { shift->() }); - my ($files, $pages)=find_src_files(); - my ($new, $internal_new)=find_new_files($files); - my ($del, $internal_del)=find_del_files($pages); - my ($changed, $internal_changed)=find_changed($files); + my ($files, $pages, $new, $internal_new, $del, $internal_del, $changed, $internal_changed); + if (! $config{rebuild} && want_find_changes() && defined $IkiWiki::lastrev) { + my ($changed_raw, $del_raw); + ($changed_raw, $del_raw, $IkiWiki::lastrev) = $IkiWiki::hooks{rcs}{rcs_find_changes}{call}->($IkiWiki::lastrev); + ($files, $pages)=process_changed_files($changed_raw, $del_raw); + } + else { + ($files, $pages)=find_src_files(); + } + if (want_find_changes()) { + if (! defined($IkiWiki::lastrev)) { + $IkiWiki::lastrev=$IkiWiki::hooks{rcs}{rcs_get_current_rev}{call}->(); + } + } + ($new, $internal_new)=find_new_files($files); + ($del, $internal_del)=find_del_files($pages); + ($changed, $internal_changed)=find_changed($files); my %existingfiles; run_hooks(needsbuild => sub { my $ret=shift->($changed, [@$del, @$internal_del]); diff --git a/debian/changelog b/debian/changelog index 0f6a44c30..e7856c01b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -5,7 +5,11 @@ ikiwiki (3.20130904.2) UNRELEASED; urgency=low * Fixed unncessary tight loop hash copy in saveindex where a pointer can be used instead. Can speed up refreshes by nearly 50% in some circumstances. - * Optmised loadindex by caching the page name in the index. + * Optimized loadindex by caching the page name in the index. + * Added only_committed_changes config setting, which speeds up wiki + refresh by querying git to find the files that were changed, rather + than looking at the work tree. Not enabled by default as it can + break some setups where not all files get committed to git. -- Joey Hess Thu, 05 Sep 2013 10:01:10 -0400 diff --git a/doc/plugins/write.mdwn b/doc/plugins/write.mdwn index d6e6d8d1e..d2d1a6329 100644 --- a/doc/plugins/write.mdwn +++ b/doc/plugins/write.mdwn @@ -1259,6 +1259,20 @@ and an error message on failure. This hook and `rcs_preprevert` are optional, if not implemented, no revert web interface will be available. +### `rcs_find_changes($)` + +Finds changes committed since the passed RCS-specific rev. Returns +a hash of the files changed, a hash of the files deleted, and the +current rev. + +This hook is optional. + +### `rcs_get_current_rev()` + +Gets a RCS-specific rev, which can later be passed to `rcs_find_changes`. + +This hook is optional. + ### PageSpec plugins It's also possible to write plugins that add new functions to diff --git a/doc/todo/optimisation_via_git_log.mdwn b/doc/todo/optimisation_via_git_log.mdwn index ad227920b..469681a99 100644 --- a/doc/todo/optimisation_via_git_log.mdwn +++ b/doc/todo/optimisation_via_git_log.mdwn @@ -23,3 +23,5 @@ to be scanned as now, as would the transient directory. Since some sites may depend on files being put into the tree and not committed, this optimisation would probably need to be something that can be optionally enabled. + +> [[done]] --[[Joey]] diff --git a/ikiwiki.spec b/ikiwiki.spec index 1a54cc231..d23dd4632 100644 --- a/ikiwiki.spec +++ b/ikiwiki.spec @@ -1,5 +1,5 @@ Name: ikiwiki -Version: 3.20130904.1 +Version: 3.20130904.2 Release: 1%{?dist} Summary: A wiki compiler