diff --git a/IkiWiki.pm b/IkiWiki.pm index c0b26e43f..b5707195b 100644 --- a/IkiWiki.pm +++ b/IkiWiki.pm @@ -11,7 +11,7 @@ use Memoize; memoize("abs2rel"); use vars qw{%config %links %oldlinks %oldpagemtime %pagectime - %renderedfiles %pagesources %depends %hooks}; + %renderedfiles %pagesources %depends %hooks %forcerebuild}; sub defaultconfig () { #{{{ wiki_file_prune_regexp => qr{((^|/).svn/|\.\.|^\.|\/\.|\.html?$|\.rss$)}, diff --git a/IkiWiki/Plugin/aggregate.pm b/IkiWiki/Plugin/aggregate.pm new file mode 100644 index 000000000..5c8151b58 --- /dev/null +++ b/IkiWiki/Plugin/aggregate.pm @@ -0,0 +1,300 @@ +#!/usr/bin/perl +# Blog aggregation plugin. +package IkiWiki::Plugin::aggregate; + +use warnings; +use strict; +use IkiWiki; + +my %feeds; +my %guids; + +sub import { #{{{ + IkiWiki::hook(type => "getopt", id => "aggregate", + call => \&getopt); + IkiWiki::hook(type => "checkconfig", id => "aggregate", + call => \&checkconfig); + IkiWiki::hook(type => "filter", id => "aggregate", + call => \&filter); + IkiWiki::hook(type => "preprocess", id => "aggregate", + call => \&preprocess); + IkiWiki::hook(type => "delete", id => "aggregate", + call => \&delete); + IkiWiki::hook(type => "savestate", id => "aggregate", + call => \&savestate); +} # }}} + +sub getopt () { #{{{ + eval q{use Getopt::Long}; + Getopt::Long::Configure('pass_through'); + GetOptions("aggregate" => \$IkiWiki::config{aggregate}); +} #}}} + +sub checkconfig () { #{{{ + loadstate(); + if ($IkiWiki::config{aggregate}) { + IkiWiki::loadindex(); + aggregate(); + savestate(); + } +} #}}} + +sub filter (@) { #{{{ + my %params=@_; + my $page=$params{page}; + + # Mark all feeds originating on this page as removable; + # preprocess will unmark those that still exist. + remove_feeds($page); + + return $params{content}; +} # }}} + +sub preprocess (@) { #{{{ + my %params=@_; + + foreach my $required (qw{name url dir}) { + if (! exists $params{$required}) { + return "[[aggregate plugin missing $required parameter]]"; + } + } + + my $feed={}; + my $name=$params{name}; + if (exists $feeds{$name}) { + $feed=$feeds{$name}; + } + else { + $feeds{$name}=$feed; + } + $feed->{name}=$name; + $feed->{sourcepage}=$params{page}; + $feed->{url}=$params{url}; + ($feed->{dir})=$params{dir}=~/$IkiWiki::config{wiki_file_regexp}/; + $feed->{dir}=~s/^\/+//; + $feed->{feedurl}=defined $params{feedurl} ? $params{feedurl} : $params{url}; + $feed->{updateinterval}=defined $params{updateinterval} ? $params{updateinterval} : 15; + $feed->{expireage}=defined $params{expireage} ? $params{expireage} : 0; + $feed->{expirecount}=defined $params{expirecount} ? $params{expirecount} : 0; + delete $feed->{remove}; + $feed->{lastupdate}=0 unless defined $feed->{lastupdate}; + $feed->{numposts}=0 unless defined $feed->{numposts}; + $feed->{newposts}=0 unless defined $feed->{newposts}; + $feed->{message}="new feed" unless defined $feed->{message}; + while (@_) { + my $key=shift; + my $value=shift; + if ($key eq 'tag') { + push @{$feed->{tags}}, $value; + } + } + + return "{url}."\">".$feed->{name}.": ". + "".$feed->{message}." (".$feed->{numposts}. + " stored posts; ".$feed->{newposts}." new)"; +} # }}} + +sub delete (@) { #{{{ + my @files=@_; + + # Remove feed data for removed pages. + foreach my $file (@files) { + my $page=IkiWiki::pagename($file); + remove_feeds($page); + } +} #}}} + +sub loadstate () { #{{{ + if (-e "$IkiWiki::config{wikistatedir}/aggregate") { + open (IN, "$IkiWiki::config{wikistatedir}/aggregate" || + die "$IkiWiki::config{wikistatedir}/aggregate: $!"); + while () { + $_=IkiWiki::possibly_foolish_untaint($_); + chomp; + my $data={}; + foreach my $i (split(/ /, $_)) { + my ($field, $val)=split(/=/, $i, 2); + if ($field eq "name" || $field eq "message") { + $data->{$field}=IkiWiki::pagetitle($val); + } + elsif ($field eq "tag") { + push @{$data->{tags}}, $val; + } + else { + $data->{$field}=$val; + } + } + + if (exists $data->{name}) { + $feeds{$data->{name}}=$data; + } + elsif (exists $data->{guid}) { + $guids{$data->{guid}}=$data; + } + } + + close IN; + } +} #}}} + +sub savestate () { #{{{ + open (OUT, ">$IkiWiki::config{wikistatedir}/aggregate" || + die "$IkiWiki::config{wikistatedir}/aggregate: $!"); + foreach my $data (values %feeds, values %guids) { + if ($data->{remove}) { + if ($data->{name}) { + foreach my $guid (values %guids) { + if ($guid->{feed} eq $data->{name}) { + $guid->{remove}=1; + } + } + } + else { + unlink pagefile($data->{page}); + } + next; + } + + my @line; + foreach my $field (keys %$data) { + if ($field eq "name" || $field eq "message") { + push @line, "$field=".IkiWiki::titlepage($data->{$field}); + } + elsif ($field eq "tags") { + push @line, "tag=$_" foreach @{$data->{tags}}; + } + else { + push @line, "$field=".$data->{$field}; + } + } + print OUT join(" ", @line)."\n"; + } + close OUT; +} #}}} + +sub aggregate () { #{{{ + eval q{use XML::Feed}; + die $@ if $@; + eval q{use HTML::Entities}; + die $@ if $@; + +FEED: foreach my $feed (values %feeds) { + # TODO: check updateinterval + $feed->{lastupdate}=time; + $feed->{newposts}=0; + $IkiWiki::forcerebuild{$feed->{sourcepage}}=1; + + IkiWiki::debug("checking feed ".$feed->{name}." ..."); + + my @urls=XML::Feed->find_feeds($feed->{feedurl}); + if (! @urls) { + $feed->{message}="could not find feed at ".$feed->{feedurl}; + IkiWiki::debug($feed->{message}); + } + foreach my $url (@urls) { + my $f=XML::Feed->parse(URI->new($url)); + if (! $f) { + $feed->{message}=XML::Feed->errstr; + IkiWiki::debug($feed->{message}); + next FEED; + } + + foreach my $entry ($f->entries) { + add_page( + feed => $feed, + title => decode_entities($entry->title), + link => $entry->link, + content => $entry->content->body, + guid => defined $entry->id ? $entry->id : time."_".$feed->name, + ctime => $entry->issued ? ($entry->issued->epoch || time) : time, + ); + } + } + $feed->{message}="processed ok"; + } + + # TODO: expiry +} #}}} + +sub add_page (@) { #{{{ + my %params=@_; + + my $feed=$params{feed}; + my $guid={}; + my $mtime; + if (exists $guids{$params{guid}}) { + # updating an existing post + $guid=$guids{$params{guid}}; + } + else { + # new post + $guid->{guid}=$params{guid}; + $guids{$params{guid}}=$guid; + $mtime=$params{ctime}; + $feed->{numposts}++; + $feed->{newposts}++; + + # assign it an unused page + my $page=$feed->{dir}."/".IkiWiki::titlepage($params{title}); + ($page)=$page=~/$IkiWiki::config{wiki_file_regexp}/; + if (! defined $page || ! length $page) { + $page=$feed->{dir}."/item"; + } + $page=~s/\.\.//g; # avoid ".." directory tricks + my $c=""; + while (exists $IkiWiki::pagesources{$page.$c} || + -e pagefile($page.$c)) { + $c++ + } + $guid->{page}=$page; + IkiWiki::debug("creating new page $page"); + } + $guid->{feed}=$feed->{name}; + + # To write or not to write? Need to avoid writing unchanged pages + # to avoid unneccessary rebuilding. The mtime from rss cannot be + # trusted; let's use a digest. + eval q{use Digest::MD5 'md5_hex'}; + my $digest=md5_hex($params{content}); + return unless ! exists $guid->{md5} || $guid->{md5} ne $digest; + $guid->{md5}=$digest; + + # Create the page. + my $template=IkiWiki::template("aggregatepost.tmpl", blind_cache => 1); + my $content=$params{content}; + $params{content}=~s/(?param(content => $params{content}); + $template->param(url => $feed->{url}); + $template->param(name => $feed->{name}); + $template->param(link => $params{link}) if defined $params{link}; + if (ref $feed->{tags}) { + $template->param(tags => map { tag => $_ }, @{$feed->{tags}}); + } + IkiWiki::writefile($guid->{page}.".html", $IkiWiki::config{srcdir}, + $template->output); + + # Set the mtime, this lets the build process get the right creation + # time on record for the new page. + utime $mtime, $mtime, pagefile($guid->{page}) if defined $mtime; +} #}}} + +sub remove_feeds () { #{{{ + my $page=shift; + + my %removed; + foreach my $id (keys %feeds) { + if ($feeds{$id}->{sourcepage} eq $page) { + $feeds{$id}->{remove}=1; + $removed{$id}=1; + } + } +} #}}} + +sub pagefile ($) { #{{{ + my $page=shift; + + return "$IkiWiki::config{srcdir}/$page.html"; +} #}}} + +1 diff --git a/IkiWiki/Plugin/skeleton.pm b/IkiWiki/Plugin/skeleton.pm index e63bab6d7..1201d055c 100644 --- a/IkiWiki/Plugin/skeleton.pm +++ b/IkiWiki/Plugin/skeleton.pm @@ -29,7 +29,7 @@ sub import { #{{{ call => \&change); IkiWiki::hook(type => "cgi", id => "skeleton", call => \&cgi); - IkiWiki::hook(type => "cgi", id => "savestate", + IkiWiki::hook(type => "savestate", id => "savestate", call => \&savestate); } # }}} diff --git a/IkiWiki/Render.pm b/IkiWiki/Render.pm index 5dbb4654c..1449b8931 100644 --- a/IkiWiki/Render.pm +++ b/IkiWiki/Render.pm @@ -399,7 +399,8 @@ sub refresh () { #{{{ my $page=pagename($file); if (! exists $oldpagemtime{$page} || - mtime(srcfile($file)) > $oldpagemtime{$page}) { + mtime(srcfile($file)) > $oldpagemtime{$page} || + $forcerebuild{$page}) { debug("rendering $file"); render($file); $rendered{$file}=1; diff --git a/IkiWiki/Setup/Standard.pm b/IkiWiki/Setup/Standard.pm index b1418ae34..b76c87b8e 100644 --- a/IkiWiki/Setup/Standard.pm +++ b/IkiWiki/Setup/Standard.pm @@ -34,16 +34,18 @@ sub setup_standard { $config{wiki_file_prune_regexp}=qr/$config{wiki_file_prune_regexp}|$setup{exclude}/; } - debug("generating wrappers.."); - my @wrappers=@{$setup{wrappers}}; - delete $setup{wrappers}; - my %startconfig=(%config); - foreach my $wrapper (@wrappers) { - %config=(%startconfig, verbose => 0, %setup, %{$wrapper}); - checkconfig(); - gen_wrapper(); + if (! $config{refresh} || $config{wrappers}) { + debug("generating wrappers.."); + my @wrappers=@{$setup{wrappers}}; + delete $setup{wrappers}; + my %startconfig=(%config); + foreach my $wrapper (@wrappers) { + %config=(%startconfig, verbose => 0, %setup, %{$wrapper}); + checkconfig(); + gen_wrapper(); + } + %config=(%startconfig); } - %config=(%startconfig); foreach my $c (keys %setup) { if (defined $setup{$c}) { diff --git a/Makefile.PL b/Makefile.PL index 641964853..303116faa 100755 --- a/Makefile.PL +++ b/Makefile.PL @@ -18,7 +18,7 @@ extra_build: --plugin=brokenlinks --plugin=pagecount \ --plugin=orphans --plugin=haiku --plugin=meta \ --plugin=tag --plugin=polygen --plugin=pagestats \ - --plugin=fortune + --plugin=fortune --plugin=aggregate ./mdwn2man ikiwiki 1 doc/usage.mdwn > ikiwiki.man ./mdwn2man ikiwiki-mass-rebuild 8 doc/ikiwiki-mass-rebuild.mdwn > ikiwiki-mass-rebuild.man diff --git a/debian/changelog b/debian/changelog index babaefff2..6f33a3236 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,10 +1,16 @@ ikiwiki (1.13) UNRELEASED; urgency=low + * ikiwiki can now download and aggregate feeds with its new aggregate + plugin, so it's possible to implement a Planet using ikiwiki! * Add a run_hooks function for the common task of running all hooks of a given type. * Add a savestate hook. * Don't put blog post forms on pages if there's no cgiurl set. * Reformat front page. + * --setup --refresh no longer rebuilds wrappers. Use --setup --refresh + --wrappers to do that. + * Add %IkiWiki::forcerebuild to provide a way for plugins like aggregate + to update pages that haven't changed on disk. -- Joey Hess Sat, 29 Jul 2006 20:10:51 -0400 diff --git a/debian/control b/debian/control index aa8b3fa24..a556fa2ed 100644 --- a/debian/control +++ b/debian/control @@ -10,7 +10,7 @@ Package: ikiwiki Architecture: all Depends: ${perl:Depends}, libxml-simple-perl, markdown, libtimedate-perl, libhtml-template-perl, libhtml-scrubber-perl, libcgi-formbuilder-perl (>= 3.02.02), libtime-duration-perl, libcgi-session-perl, libmail-sendmail-perl, gcc | c-compiler, libc6-dev | libc-dev Recommends: subversion | git-core, hyperestraier -Suggests: viewcvs, librpc-xml-perl, libtext-wikiformat-perl, polygen, tidy +Suggests: viewcvs, librpc-xml-perl, libtext-wikiformat-perl, polygen, tidy, libxml-feed-perl, libhtml-parser-perl Description: a wiki compiler ikiwiki converts a directory full of wiki pages into html pages suitable for publishing on a website. Unlike many wikis, ikiwiki does not have its diff --git a/debian/postinst b/debian/postinst index 72b457716..4e2425fcb 100755 --- a/debian/postinst +++ b/debian/postinst @@ -10,5 +10,5 @@ if [ "$1" = configure ] && \ dpkg --compare-versions "$2" lt "$firstcompat"; then ikiwiki-mass-rebuild else - ikiwiki-mass-rebuild -refresh + ikiwiki-mass-rebuild -refresh -wrappers fi diff --git a/doc/features.mdwn b/doc/features.mdwn index a43cd1c9a..0a235d708 100644 --- a/doc/features.mdwn +++ b/doc/features.mdwn @@ -52,6 +52,10 @@ Some of ikiwiki's features: Ikiwiki's own [[TODO]], [[news]], and [[plugins]] pages are good examples of some of the flexible ways that this can be used. + Ikiwiki can also [[plugin/aggregate]] external blogs, feeding them into + the wiki. This can be used to create a Planet type site that aggregates + interesting feeds. + * [[tags]] You can tag pages and use these tags in various ways. Tags will show diff --git a/doc/install.mdwn b/doc/install.mdwn index d23c88e41..eb5b91e67 100644 --- a/doc/install.mdwn +++ b/doc/install.mdwn @@ -4,7 +4,7 @@ Ikiwiki requires [[MarkDown]] be installed, and also uses the following perl modules if available: `CGI::Session` `CGI::FormBuilder` (version 3.02.02 or newer) `HTML::Template` `Mail::SendMail` `Time::Duration` `Date::Parse` (libtimedate-perl), `HTML::Scrubber`, `RPC::XML`, -`XML::Simple` +`XML::Simple`, `XML::Feed`, `HTML::Parser` If you want to install from the tarball, you should make sure that the required perl modules are installed, then run: diff --git a/doc/plugins.mdwn b/doc/plugins.mdwn index 0596fc068..1c04d09f5 100644 --- a/doc/plugins.mdwn +++ b/doc/plugins.mdwn @@ -1,7 +1,7 @@ Most of ikiwiki's [[features]] are implemented as plugins. Beyond the [[type/core]] features, there are plugins to [[type/format]] text, -use [[type/tags]], show [[type/meta]] information, or just have -[[type/fun]]. +use [[type/tags]], show [[type/meta]] information, do other [[type/useful]] +stuff, or just have [[type/fun]]. There's documentation if you want to [[write]] your own plugins, or you can install and use plugins contributed by others. diff --git a/doc/plugins/aggregate.mdwn b/doc/plugins/aggregate.mdwn new file mode 100644 index 000000000..690904f73 --- /dev/null +++ b/doc/plugins/aggregate.mdwn @@ -0,0 +1,53 @@ +This plugin allows content from other blogs to be aggregated into the wiki. +Aggregate a blog as follows: + + \[[aggregate name="example blog" feedurl="http://example.com/index.rss" url="http://example.com/" updateinterval="15" dir="example"] + +That example aggregates posts from the expecified RSS feed, updating no +more frequently than once every 15 minutes, and puts a page per post under +the example/ directory in the wiki. + +You can then use ikiwiki's [[blog]] support to create a blog of one or more +aggregated feeds. + +## setup + +Make sure that you have the [[html]] plugin enabled, as the created pages are +in html format. The [[tag]] plugin is also recommended. + +You will need to run ikiwiki periodically from a cron job, passing it the +--aggregate parameter, to make it check for new posts. Here's an example +crontab entry: + + */15 * * * * ikiwiki --setup my.wiki --aggregate --refresh + +## usage + +Here are descriptions of all the supported parameters to the `aggregate` +directive: + +* `name` - A name for the feed. Each feed must have a unique name. + Required. +* `url` - The url to the web page for the blog that's being aggregated. + Required. +* `dir` - The directory in the wiki where pages should be saved. Required. +* `feedurl` - The url to the feed. Optional, if it's not specified ikiwiki + will look for feeds on the `blogurl`. RSS and atom feeds are supported. +* `updateinterval` - How often to check for new posts, in minutes. Default + is 15 minutes. +* `expireage` - Expire old items from this blog if they are older than + a specified number of days. Default is to never expire on age. +* `expirecount` - Expire old items from this blog if there are more than + the specified number total. Oldest items will be expired first. Default + is to never expire on count. +* `tag` - A tag to tag each post from the blog with. A good tag to use is + the name of the blog. Can be repeated multiple times. The [[tag]] plugin + must be enabled for this to work. + +Note that even if you are using subversion or another revision control +system, pages created by aggregation will *not* be checked into revision +control. + +This plugin is not enabled by default. + +[[tag type/useful]] diff --git a/doc/plugins/search.mdwn b/doc/plugins/search.mdwn index d4a6b4efe..78088aed8 100644 --- a/doc/plugins/search.mdwn +++ b/doc/plugins/search.mdwn @@ -4,3 +4,5 @@ full text search to ikiwiki, using the [[HyperEstraier]] engine. It's possible to configure HyperEstraier via one of ikiwiki's [[templates]], but for most users, no configuration should be needed aside from enabling the plugin. + +[[tag type/useful]] diff --git a/doc/plugins/type/useful.mdwn b/doc/plugins/type/useful.mdwn new file mode 100644 index 000000000..92fcf5af1 --- /dev/null +++ b/doc/plugins/type/useful.mdwn @@ -0,0 +1 @@ +These plugins perform various miscellaneous useful functions. diff --git a/doc/plugins/write.mdwn b/doc/plugins/write.mdwn index 025a242a6..925717777 100644 --- a/doc/plugins/write.mdwn +++ b/doc/plugins/write.mdwn @@ -96,7 +96,7 @@ make arbitrary changes. The function is passed named parameters `page` and ## htmlize - IkiWiki::hook(type => "htmlize", id => "ext", call => \&filter); + IkiWiki::hook(type => "htmlize", id => "ext", call => \&htmlize); Runs on the raw source of a page and turns it into html. The id parameter specifies the filename extension that a file must have to be htmlized using @@ -135,7 +135,7 @@ content. ## delete - IkiWiki::hook(type => "delete", id => "foo", call => \&dele); + IkiWiki::hook(type => "delete", id => "foo", call => \&delete); Each time a page or pages is removed from the wiki, the referenced function is called, and passed the names of the source files that were removed. @@ -190,6 +190,8 @@ use the following hashes, using a page name as the key: Many plugins will need to add dependencies to this hash; the best way to do it is by using the IkiWiki::add_depends function, which takes as its parameters the page name and a [[GlobList]] of dependencies to add. +* `%IkiWiki::forcerebuild` any pages set as the keys to this hash will be + treated as if they're modified and rebuilt. # A note on generating html links diff --git a/doc/templates.mdwn b/doc/templates.mdwn index 10f715d9d..e500638f4 100644 --- a/doc/templates.mdwn +++ b/doc/templates.mdwn @@ -26,6 +26,8 @@ It ships with some basic templates which can be customised: can read the [[HyperEstraier]] docs and configure it using this. * `blogpost.tmpl` - Used for a form to add a post to a blog (and a rss link) * `rsslink.tmpl` - Used to add a rss link if blogpost.tmpl is not used. +* `aggregatepost.tmpl` - Used by the [[plugins/aggregate]] plugin to create + a page for a post. If you like, you can add these to further customise it: diff --git a/doc/todo/aggregation.mdwn b/doc/todo/aggregation.mdwn index 7d765f9e9..53b3133e2 100644 --- a/doc/todo/aggregation.mdwn +++ b/doc/todo/aggregation.mdwn @@ -1,24 +1 @@ -Here's a scary idea.. A plugin that can aggregate feeds from other -locations. Presumably there would need to be a cron job to build the wiki -periodically, and each time it's built any new items would be turned into -pages etc. There might also need to be a way to expire old items, unless -you wanted to keep them forever. - -This would allow ikiwiki to work as a kind of a planet, or at least a -poor-man's news aggregator. - -* XML::Feed has a very nice interface, may require valid feeds though. -* How to store GUIDs? Maybe as meta tags on pages, although that would need - caching of such metadata somewhere. -* How to configure which feeds to pull, how often, and where to put the - pulled entries? One way would be command line/config file, but I think - better would be to use preprocessor directives in a wiki page, probably - the same page that inlines all the pages together. -* Where to store when a feed was last pulled? - -So I need: - -* A way to store info from the preprocessor directives about what pages - to pull and expiry. -* A way to store info on last pull time, guids, etc. -* Switch for a mode that a) pulls b) expires old c) rebuilds wiki (for cron) +* Still need to support feed expiry. diff --git a/doc/usage.mdwn b/doc/usage.mdwn index 691880a96..a6ded5ec2 100644 --- a/doc/usage.mdwn +++ b/doc/usage.mdwn @@ -24,8 +24,7 @@ These options control the mode that ikiwiki is operating in. * --refresh Refresh the wiki, updating any changed pages. This is the default - behavior so you don't normally need to specify it. If used with -setup also - updates any configured wrappers. + behavior so you don't normally need to specify it. * --rebuild @@ -59,11 +58,19 @@ These options control the mode that ikiwiki is operating in. The default action when --setup is specified is to automatically generate wrappers for a wiki based on data in a config file, and rebuild the wiki. - If you also pass the --refresh option, ikiwiki will instead just refresh - the wiki described in the setup file. [[ikiwiki.setup]] is an example of such a config file. +* --wrappers + + If used with --setup --refresh, this makes it also update any configured + wrappers. + +* --aggregate + + If the aggregate plugin is enabled, this makes ikiwiki poll configured + feeds and save new posts to the srcdir. + # CONFIG OPTIONS These options configure the wiki. Note that plugins can add additional diff --git a/ikiwiki b/ikiwiki index 28eba6f64..6518b8626 100755 --- a/ikiwiki +++ b/ikiwiki @@ -22,6 +22,7 @@ sub getconfig () { #{{{ "verbose|v!" => \$config{verbose}, "rebuild!" => \$config{rebuild}, "refresh!" => \$config{refresh}, + "wrappers!" => \$config{wrappers}, "getctime" => \$config{getctime}, "wrappermode=i" => \$config{wrappermode}, "rcs=s" => \$config{rcs}, diff --git a/templates/aggregatepost.tmpl b/templates/aggregatepost.tmpl new file mode 100644 index 000000000..4dfedf53d --- /dev/null +++ b/templates/aggregatepost.tmpl @@ -0,0 +1,13 @@ + +
+

+ +From + +; permalink + + +

+ +[[]] +