2006-07-30 06:31:08 +02:00
|
|
|
#!/usr/bin/perl
|
2008-05-06 02:20:45 +02:00
|
|
|
# Feed aggregation plugin.
|
2006-07-30 06:31:08 +02:00
|
|
|
package IkiWiki::Plugin::aggregate;
|
|
|
|
|
|
|
|
use warnings;
|
|
|
|
use strict;
|
2008-12-23 22:34:19 +01:00
|
|
|
use IkiWiki 3.00;
|
2006-08-03 23:50:47 +02:00
|
|
|
use HTML::Parser;
|
|
|
|
use HTML::Tagset;
|
2008-03-14 23:43:54 +01:00
|
|
|
use HTML::Entities;
|
2006-11-08 21:13:59 +01:00
|
|
|
use open qw{:utf8 :std};
|
2006-07-30 06:31:08 +02:00
|
|
|
|
|
|
|
my %feeds;
|
|
|
|
my %guids;
|
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub import {
|
2006-09-10 00:50:27 +02:00
|
|
|
hook(type => "getopt", id => "aggregate", call => \&getopt);
|
2008-07-26 00:05:55 +02:00
|
|
|
hook(type => "getsetup", id => "aggregate", call => \&getsetup);
|
2011-03-21 19:32:46 +01:00
|
|
|
hook(type => "checkconfig", id => "aggregate", call => \&checkconfig,
|
|
|
|
last => 1);
|
* meta: Drop support for "meta link", since supporting this for internal
links required meta to be run during scan, which complicated its data
storage, since it had to clear data stored during the scan pass to avoid
duplicating it during the normal preprocessing pass.
* If you used "meta link", you should switch to either "meta openid" (for
openid delegations), or tags (for internal, invisible links). I assume
that nobody really used "meta link" for external, non-openid links, since
the htmlscrubber ate those. (Tell me differently and I'll consider bringing
back that support.)
* meta: Improved data storage.
* meta: Drop the hackish filter hook that was used to clear
stored data before preprocessing, this hack was ugly, and broken (cf:
liw's disappearing openids).
* aggregate: Convert filter hook to a needsbuild hook.
2007-12-16 21:56:09 +01:00
|
|
|
hook(type => "needsbuild", id => "aggregate", call => \&needsbuild);
|
2006-09-10 00:50:27 +02:00
|
|
|
hook(type => "preprocess", id => "aggregate", call => \&preprocess);
|
|
|
|
hook(type => "delete", id => "aggregate", call => \&delete);
|
|
|
|
hook(type => "savestate", id => "aggregate", call => \&savestate);
|
2008-07-15 02:04:29 +02:00
|
|
|
hook(type => "htmlize", id => "_aggregated", call => \&htmlize);
|
2008-05-06 02:20:45 +02:00
|
|
|
if (exists $config{aggregate_webtrigger} && $config{aggregate_webtrigger}) {
|
|
|
|
hook(type => "cgi", id => "aggregate", call => \&cgi);
|
|
|
|
}
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub getopt () {
|
2006-07-30 06:31:08 +02:00
|
|
|
eval q{use Getopt::Long};
|
2006-11-08 22:03:33 +01:00
|
|
|
error($@) if $@;
|
2006-07-30 06:31:08 +02:00
|
|
|
Getopt::Long::Configure('pass_through');
|
2008-07-12 19:15:11 +02:00
|
|
|
GetOptions(
|
|
|
|
"aggregate" => \$config{aggregate},
|
|
|
|
"aggregateinternal!" => \$config{aggregateinternal},
|
|
|
|
);
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub getsetup () {
|
2008-07-26 00:05:55 +02:00
|
|
|
return
|
2008-08-03 22:40:12 +02:00
|
|
|
plugin => {
|
|
|
|
safe => 1,
|
|
|
|
rebuild => undef,
|
|
|
|
},
|
2008-07-26 00:05:55 +02:00
|
|
|
aggregateinternal => {
|
|
|
|
type => "boolean",
|
2008-12-23 22:08:21 +01:00
|
|
|
example => 1,
|
2008-07-26 20:43:47 +02:00
|
|
|
description => "enable aggregation to internal pages?",
|
2008-07-26 00:05:55 +02:00
|
|
|
safe => 0, # enabling needs manual transition
|
|
|
|
rebuild => 0,
|
|
|
|
},
|
|
|
|
aggregate_webtrigger => {
|
|
|
|
type => "boolean",
|
2008-07-27 03:07:15 +02:00
|
|
|
example => 0,
|
2008-07-26 20:43:47 +02:00
|
|
|
description => "allow aggregation to be triggered via the web?",
|
2008-07-26 00:05:55 +02:00
|
|
|
safe => 1,
|
|
|
|
rebuild => 0,
|
|
|
|
},
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2008-07-26 00:05:55 +02:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub checkconfig () {
|
2008-12-23 22:08:21 +01:00
|
|
|
if (! defined $config{aggregateinternal}) {
|
|
|
|
$config{aggregateinternal}=1;
|
|
|
|
}
|
|
|
|
|
2011-03-21 19:44:43 +01:00
|
|
|
# This is done here rather than in a refresh hook because it
|
|
|
|
# needs to run before the wiki is locked.
|
2007-05-21 04:52:51 +02:00
|
|
|
if ($config{aggregate} && ! ($config{post_commit} &&
|
|
|
|
IkiWiki::commit_hook_enabled())) {
|
2008-05-06 02:20:45 +02:00
|
|
|
launchaggregation();
|
|
|
|
}
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2008-05-06 02:20:45 +02:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub cgi ($) {
|
2008-05-06 02:20:45 +02:00
|
|
|
my $cgi=shift;
|
2008-02-03 22:48:26 +01:00
|
|
|
|
2008-05-06 02:20:45 +02:00
|
|
|
if (defined $cgi->param('do') &&
|
|
|
|
$cgi->param("do") eq "aggregate_webtrigger") {
|
|
|
|
$|=1;
|
|
|
|
print "Content-Type: text/plain\n\n";
|
|
|
|
$config{cgi}=0;
|
|
|
|
$config{verbose}=1;
|
|
|
|
$config{syslog}=0;
|
|
|
|
print gettext("Aggregation triggered via web.")."\n\n";
|
|
|
|
if (launchaggregation()) {
|
|
|
|
IkiWiki::lockwiki();
|
|
|
|
IkiWiki::loadindex();
|
|
|
|
require IkiWiki::Render;
|
|
|
|
IkiWiki::refresh();
|
|
|
|
IkiWiki::saveindex();
|
2008-02-03 22:48:26 +01:00
|
|
|
}
|
2008-05-06 02:20:45 +02:00
|
|
|
else {
|
|
|
|
print gettext("Nothing to do right now, all feeds are up-to-date!")."\n";
|
2008-02-03 22:48:26 +01:00
|
|
|
}
|
2008-05-06 02:20:45 +02:00
|
|
|
exit 0;
|
|
|
|
}
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2008-05-06 02:20:45 +02:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub launchaggregation () {
|
2008-05-06 02:20:45 +02:00
|
|
|
# See if any feeds need aggregation.
|
|
|
|
loadstate();
|
|
|
|
my @feeds=needsaggregate();
|
|
|
|
return unless @feeds;
|
|
|
|
if (! lockaggregate()) {
|
2012-12-28 02:25:59 +01:00
|
|
|
error("an aggregation process is already running");
|
2008-05-06 02:20:45 +02:00
|
|
|
}
|
|
|
|
# force a later rebuild of source pages
|
|
|
|
$IkiWiki::forcerebuild{$_->{sourcepage}}=1
|
|
|
|
foreach @feeds;
|
2008-02-03 05:56:13 +01:00
|
|
|
|
2008-05-06 02:20:45 +02:00
|
|
|
# Fork a child process to handle the aggregation.
|
|
|
|
# The parent process will then handle building the
|
|
|
|
# result. This avoids messy code to clear state
|
|
|
|
# accumulated while aggregating.
|
|
|
|
defined(my $pid = fork) or error("Can't fork: $!");
|
|
|
|
if (! $pid) {
|
|
|
|
IkiWiki::loadindex();
|
|
|
|
# Aggregation happens without the main wiki lock
|
|
|
|
# being held. This allows editing pages etc while
|
|
|
|
# aggregation is running.
|
|
|
|
aggregate(@feeds);
|
|
|
|
|
|
|
|
IkiWiki::lockwiki;
|
|
|
|
# Merge changes, since aggregation state may have
|
|
|
|
# changed on disk while the aggregation was happening.
|
|
|
|
mergestate();
|
|
|
|
expire();
|
|
|
|
savestate();
|
|
|
|
IkiWiki::unlockwiki;
|
|
|
|
exit 0;
|
|
|
|
}
|
|
|
|
waitpid($pid,0);
|
|
|
|
if ($?) {
|
|
|
|
error "aggregation failed with code $?";
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
2008-05-06 02:20:45 +02:00
|
|
|
|
|
|
|
clearstate();
|
|
|
|
unlockaggregate();
|
|
|
|
|
|
|
|
return 1;
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2008-07-15 02:04:29 +02:00
|
|
|
# Pages with extension _aggregated have plain html markup, pass through.
|
2008-12-17 21:22:16 +01:00
|
|
|
sub htmlize (@) {
|
2008-07-15 02:04:29 +02:00
|
|
|
my %params=@_;
|
|
|
|
return $params{content};
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2008-07-15 02:04:29 +02:00
|
|
|
|
2008-07-15 01:43:45 +02:00
|
|
|
# Used by ikiwiki-transition aggregateinternal.
|
2008-12-17 21:22:16 +01:00
|
|
|
sub migrate_to_internal {
|
2008-07-15 00:39:22 +02:00
|
|
|
if (! lockaggregate()) {
|
2008-07-15 01:43:45 +02:00
|
|
|
error("an aggregation process is currently running");
|
2008-07-15 00:39:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
IkiWiki::lockwiki();
|
|
|
|
loadstate();
|
2008-07-15 01:43:45 +02:00
|
|
|
$config{verbose}=1;
|
2008-07-15 00:39:22 +02:00
|
|
|
|
|
|
|
foreach my $data (values %guids) {
|
|
|
|
next unless $data->{page};
|
2008-07-15 04:24:05 +02:00
|
|
|
next if $data->{expired};
|
2008-07-15 01:43:45 +02:00
|
|
|
|
2008-07-15 00:39:22 +02:00
|
|
|
$config{aggregateinternal} = 0;
|
2008-09-30 00:56:24 +02:00
|
|
|
my $oldname = "$config{srcdir}/".htmlfn($data->{page});
|
2010-12-26 00:27:59 +01:00
|
|
|
if (! -e $oldname) {
|
|
|
|
$oldname = $IkiWiki::Plugin::transient::transientdir."/".htmlfn($data->{page});
|
|
|
|
}
|
|
|
|
|
2008-07-15 04:24:05 +02:00
|
|
|
my $oldoutput = $config{destdir}."/".IkiWiki::htmlpage($data->{page});
|
2008-07-15 01:43:45 +02:00
|
|
|
|
2008-07-15 00:39:22 +02:00
|
|
|
$config{aggregateinternal} = 1;
|
2010-12-26 00:27:59 +01:00
|
|
|
my $newname = $IkiWiki::Plugin::transient::transientdir."/".htmlfn($data->{page});
|
2008-07-15 01:43:45 +02:00
|
|
|
|
|
|
|
debug "moving $oldname -> $newname";
|
2008-07-15 00:39:22 +02:00
|
|
|
if (-e $newname) {
|
|
|
|
if (-e $oldname) {
|
|
|
|
error("$newname already exists");
|
|
|
|
}
|
|
|
|
else {
|
2008-07-15 01:43:45 +02:00
|
|
|
debug("already renamed to $newname?");
|
2008-07-15 00:39:22 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
elsif (-e $oldname) {
|
|
|
|
rename($oldname, $newname) || error("$!");
|
|
|
|
}
|
|
|
|
else {
|
2008-07-15 01:43:45 +02:00
|
|
|
debug("$oldname not found");
|
2008-07-15 00:39:22 +02:00
|
|
|
}
|
2008-07-15 04:24:05 +02:00
|
|
|
if (-e $oldoutput) {
|
2008-07-15 05:00:29 +02:00
|
|
|
require IkiWiki::Render;
|
2008-07-15 04:24:05 +02:00
|
|
|
debug("removing output file $oldoutput");
|
2012-04-07 18:52:29 +02:00
|
|
|
IkiWiki::prune($oldoutput, $config{destdir});
|
2008-07-15 04:24:05 +02:00
|
|
|
}
|
2008-07-15 00:39:22 +02:00
|
|
|
}
|
2008-07-15 01:43:45 +02:00
|
|
|
|
2008-07-15 00:39:22 +02:00
|
|
|
savestate();
|
|
|
|
IkiWiki::unlockwiki;
|
2008-07-15 01:43:45 +02:00
|
|
|
|
2008-07-15 00:39:22 +02:00
|
|
|
unlockaggregate();
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2008-07-15 00:39:22 +02:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub needsbuild (@) {
|
* meta: Drop support for "meta link", since supporting this for internal
links required meta to be run during scan, which complicated its data
storage, since it had to clear data stored during the scan pass to avoid
duplicating it during the normal preprocessing pass.
* If you used "meta link", you should switch to either "meta openid" (for
openid delegations), or tags (for internal, invisible links). I assume
that nobody really used "meta link" for external, non-openid links, since
the htmlscrubber ate those. (Tell me differently and I'll consider bringing
back that support.)
* meta: Improved data storage.
* meta: Drop the hackish filter hook that was used to clear
stored data before preprocessing, this hack was ugly, and broken (cf:
liw's disappearing openids).
* aggregate: Convert filter hook to a needsbuild hook.
2007-12-16 21:56:09 +01:00
|
|
|
my $needsbuild=shift;
|
2007-12-17 01:40:50 +01:00
|
|
|
|
2008-02-03 22:48:26 +01:00
|
|
|
loadstate();
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2007-12-30 20:49:25 +01:00
|
|
|
foreach my $feed (values %feeds) {
|
2008-01-29 23:50:11 +01:00
|
|
|
if (exists $pagesources{$feed->{sourcepage}} &&
|
2008-01-29 23:36:25 +01:00
|
|
|
grep { $_ eq $pagesources{$feed->{sourcepage}} } @$needsbuild) {
|
2008-02-03 22:48:26 +01:00
|
|
|
# Mark all feeds originating on this page as
|
|
|
|
# not yet seen; preprocess will unmark those that
|
|
|
|
# still exist.
|
|
|
|
markunseen($feed->{sourcepage});
|
* meta: Drop support for "meta link", since supporting this for internal
links required meta to be run during scan, which complicated its data
storage, since it had to clear data stored during the scan pass to avoid
duplicating it during the normal preprocessing pass.
* If you used "meta link", you should switch to either "meta openid" (for
openid delegations), or tags (for internal, invisible links). I assume
that nobody really used "meta link" for external, non-openid links, since
the htmlscrubber ate those. (Tell me differently and I'll consider bringing
back that support.)
* meta: Improved data storage.
* meta: Drop the hackish filter hook that was used to clear
stored data before preprocessing, this hack was ugly, and broken (cf:
liw's disappearing openids).
* aggregate: Convert filter hook to a needsbuild hook.
2007-12-16 21:56:09 +01:00
|
|
|
}
|
|
|
|
}
|
2010-09-07 18:08:59 +02:00
|
|
|
|
|
|
|
return $needsbuild;
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub preprocess (@) {
|
2006-07-30 06:31:08 +02:00
|
|
|
my %params=@_;
|
|
|
|
|
2006-07-30 07:14:35 +02:00
|
|
|
foreach my $required (qw{name url}) {
|
2006-07-30 06:31:08 +02:00
|
|
|
if (! exists $params{$required}) {
|
2008-07-13 21:05:34 +02:00
|
|
|
error sprintf(gettext("missing %s parameter"), $required)
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
my $feed={};
|
|
|
|
my $name=$params{name};
|
|
|
|
if (exists $feeds{$name}) {
|
|
|
|
$feed=$feeds{$name};
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
$feeds{$name}=$feed;
|
|
|
|
}
|
|
|
|
$feed->{name}=$name;
|
|
|
|
$feed->{sourcepage}=$params{page};
|
|
|
|
$feed->{url}=$params{url};
|
2008-09-27 20:14:36 +02:00
|
|
|
my $dir=exists $params{dir} ? $params{dir} : $params{page}."/".titlepage($params{name});
|
2006-07-30 07:14:35 +02:00
|
|
|
$dir=~s/^\/+//;
|
2006-09-10 00:50:27 +02:00
|
|
|
($dir)=$dir=~/$config{wiki_file_regexp}/;
|
2006-07-30 07:14:35 +02:00
|
|
|
$feed->{dir}=$dir;
|
2006-07-30 08:14:44 +02:00
|
|
|
$feed->{feedurl}=defined $params{feedurl} ? $params{feedurl} : "";
|
2006-07-30 07:14:35 +02:00
|
|
|
$feed->{updateinterval}=defined $params{updateinterval} ? $params{updateinterval} * 60 : 15 * 60;
|
2006-07-30 06:31:08 +02:00
|
|
|
$feed->{expireage}=defined $params{expireage} ? $params{expireage} : 0;
|
|
|
|
$feed->{expirecount}=defined $params{expirecount} ? $params{expirecount} : 0;
|
2008-06-21 21:51:04 +02:00
|
|
|
if (exists $params{template}) {
|
|
|
|
$params{template}=~s/[^-_a-zA-Z0-9]+//g;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
$params{template} = "aggregatepost"
|
|
|
|
}
|
|
|
|
$feed->{template}=$params{template} . ".tmpl";
|
2008-02-03 22:48:26 +01:00
|
|
|
delete $feed->{unseen};
|
2006-07-30 06:31:08 +02:00
|
|
|
$feed->{lastupdate}=0 unless defined $feed->{lastupdate};
|
2008-12-18 03:27:28 +01:00
|
|
|
$feed->{lasttry}=$feed->{lastupdate} unless defined $feed->{lasttry};
|
2006-07-30 06:31:08 +02:00
|
|
|
$feed->{numposts}=0 unless defined $feed->{numposts};
|
|
|
|
$feed->{newposts}=0 unless defined $feed->{newposts};
|
2006-12-29 05:38:40 +01:00
|
|
|
$feed->{message}=gettext("new feed") unless defined $feed->{message};
|
2006-10-13 20:31:18 +02:00
|
|
|
$feed->{error}=0 unless defined $feed->{error};
|
2006-07-30 08:57:42 +02:00
|
|
|
$feed->{tags}=[];
|
2006-07-30 06:31:08 +02:00
|
|
|
while (@_) {
|
|
|
|
my $key=shift;
|
|
|
|
my $value=shift;
|
|
|
|
if ($key eq 'tag') {
|
|
|
|
push @{$feed->{tags}}, $value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return "<a href=\"".$feed->{url}."\">".$feed->{name}."</a>: ".
|
2006-10-13 20:31:18 +02:00
|
|
|
($feed->{error} ? "<em>" : "").$feed->{message}.
|
|
|
|
($feed->{error} ? "</em>" : "").
|
2006-12-29 05:38:40 +01:00
|
|
|
" (".$feed->{numposts}." ".gettext("posts").
|
|
|
|
($feed->{newposts} ? "; ".$feed->{newposts}.
|
|
|
|
" ".gettext("new") : "").
|
2006-09-08 01:54:37 +02:00
|
|
|
")";
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub delete (@) {
|
2006-07-30 06:31:08 +02:00
|
|
|
my @files=@_;
|
|
|
|
|
|
|
|
# Remove feed data for removed pages.
|
|
|
|
foreach my $file (@files) {
|
2006-09-10 00:50:27 +02:00
|
|
|
my $page=pagename($file);
|
2008-02-03 22:48:26 +01:00
|
|
|
markunseen($page);
|
|
|
|
}
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2008-02-03 22:48:26 +01:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub markunseen ($) {
|
2008-02-03 22:48:26 +01:00
|
|
|
my $page=shift;
|
|
|
|
|
|
|
|
foreach my $id (keys %feeds) {
|
|
|
|
if ($feeds{$id}->{sourcepage} eq $page) {
|
|
|
|
$feeds{$id}->{unseen}=1;
|
|
|
|
}
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2007-05-21 04:52:51 +02:00
|
|
|
my $state_loaded=0;
|
2008-02-03 22:48:26 +01:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub loadstate () {
|
2007-05-21 04:52:51 +02:00
|
|
|
return if $state_loaded;
|
2008-01-09 02:41:25 +01:00
|
|
|
$state_loaded=1;
|
2006-09-10 00:50:27 +02:00
|
|
|
if (-e "$config{wikistatedir}/aggregate") {
|
2010-07-06 19:57:17 +02:00
|
|
|
open(IN, "<", "$config{wikistatedir}/aggregate") ||
|
2007-07-28 23:01:56 +02:00
|
|
|
die "$config{wikistatedir}/aggregate: $!";
|
2006-07-30 06:31:08 +02:00
|
|
|
while (<IN>) {
|
|
|
|
$_=IkiWiki::possibly_foolish_untaint($_);
|
|
|
|
chomp;
|
|
|
|
my $data={};
|
|
|
|
foreach my $i (split(/ /, $_)) {
|
|
|
|
my ($field, $val)=split(/=/, $i, 2);
|
2006-07-30 08:20:58 +02:00
|
|
|
if ($field eq "name" || $field eq "feed" ||
|
|
|
|
$field eq "guid" || $field eq "message") {
|
2006-07-30 08:55:33 +02:00
|
|
|
$data->{$field}=decode_entities($val, " \t\n");
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
|
|
|
elsif ($field eq "tag") {
|
|
|
|
push @{$data->{tags}}, $val;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
$data->{$field}=$val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (exists $data->{name}) {
|
|
|
|
$feeds{$data->{name}}=$data;
|
|
|
|
}
|
|
|
|
elsif (exists $data->{guid}) {
|
|
|
|
$guids{$data->{guid}}=$data;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
close IN;
|
|
|
|
}
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub savestate () {
|
2008-01-05 07:26:09 +01:00
|
|
|
return unless $state_loaded;
|
2008-02-03 22:48:26 +01:00
|
|
|
garbage_collect();
|
2007-02-15 03:22:08 +01:00
|
|
|
my $newfile="$config{wikistatedir}/aggregate.new";
|
|
|
|
my $cleanup = sub { unlink($newfile) };
|
2010-07-06 19:57:17 +02:00
|
|
|
open (OUT, ">", $newfile) || error("open $newfile: $!", $cleanup);
|
2006-07-30 06:31:08 +02:00
|
|
|
foreach my $data (values %feeds, values %guids) {
|
|
|
|
my @line;
|
|
|
|
foreach my $field (keys %$data) {
|
2006-07-30 08:20:58 +02:00
|
|
|
if ($field eq "name" || $field eq "feed" ||
|
|
|
|
$field eq "guid" || $field eq "message") {
|
2006-07-30 08:55:33 +02:00
|
|
|
push @line, "$field=".encode_entities($data->{$field}, " \t\n");
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
|
|
|
elsif ($field eq "tags") {
|
|
|
|
push @line, "tag=$_" foreach @{$data->{tags}};
|
|
|
|
}
|
|
|
|
else {
|
2008-12-19 23:57:09 +01:00
|
|
|
push @line, "$field=".$data->{$field}
|
|
|
|
if defined $data->{$field};
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
|
|
|
}
|
2007-02-15 03:22:08 +01:00
|
|
|
print OUT join(" ", @line)."\n" || error("write $newfile: $!", $cleanup);
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
2007-02-15 03:22:08 +01:00
|
|
|
close OUT || error("save $newfile: $!", $cleanup);
|
|
|
|
rename($newfile, "$config{wikistatedir}/aggregate") ||
|
|
|
|
error("rename $newfile: $!", $cleanup);
|
2010-07-06 19:57:17 +02:00
|
|
|
|
|
|
|
my $timestamp=undef;
|
|
|
|
foreach my $feed (keys %feeds) {
|
2010-07-06 22:02:41 +02:00
|
|
|
my $t=$feeds{$feed}->{lastupdate}+$feeds{$feed}->{updateinterval};
|
|
|
|
if (! defined $timestamp || $timestamp > $t) {
|
|
|
|
$timestamp=$t;
|
2010-07-06 19:57:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
$newfile=~s/\.new$/time/;
|
|
|
|
open (OUT, ">", $newfile) || error("open $newfile: $!", $cleanup);
|
|
|
|
if (defined $timestamp) {
|
|
|
|
print OUT $timestamp."\n";
|
|
|
|
}
|
|
|
|
close OUT || error("save $newfile: $!", $cleanup);
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub garbage_collect () {
|
2008-02-03 22:48:26 +01:00
|
|
|
foreach my $name (keys %feeds) {
|
|
|
|
# remove any feeds that were not seen while building the pages
|
|
|
|
# that used to contain them
|
|
|
|
if ($feeds{$name}->{unseen}) {
|
|
|
|
delete $feeds{$name};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach my $guid (values %guids) {
|
|
|
|
# any guid whose feed is gone should be removed
|
|
|
|
if (! exists $feeds{$guid->{feed}}) {
|
2010-12-26 00:27:59 +01:00
|
|
|
if (exists $guid->{page}) {
|
2011-02-09 18:56:05 +01:00
|
|
|
unlink $IkiWiki::Plugin::transient::transientdir."/".htmlfn($guid->{page})
|
|
|
|
|| unlink "$config{srcdir}/".htmlfn($guid->{page});
|
2010-12-26 00:27:59 +01:00
|
|
|
}
|
2008-02-03 22:48:26 +01:00
|
|
|
delete $guids{$guid->{guid}};
|
|
|
|
}
|
|
|
|
# handle expired guids
|
|
|
|
elsif ($guid->{expired} && exists $guid->{page}) {
|
2008-09-30 00:56:24 +02:00
|
|
|
unlink "$config{srcdir}/".htmlfn($guid->{page});
|
2010-12-26 00:27:59 +01:00
|
|
|
unlink $IkiWiki::Plugin::transient::transientdir."/".htmlfn($guid->{page});
|
2008-02-03 22:48:26 +01:00
|
|
|
delete $guid->{page};
|
|
|
|
delete $guid->{md5};
|
|
|
|
}
|
|
|
|
}
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2008-02-03 22:48:26 +01:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub mergestate () {
|
2008-02-03 22:48:26 +01:00
|
|
|
# Load the current state in from disk, and merge into it
|
|
|
|
# values from the state in memory that might have changed
|
|
|
|
# during aggregation.
|
|
|
|
my %myfeeds=%feeds;
|
|
|
|
my %myguids=%guids;
|
|
|
|
clearstate();
|
|
|
|
loadstate();
|
|
|
|
|
|
|
|
# All that can change in feed state during aggregation is a few
|
|
|
|
# fields.
|
|
|
|
foreach my $name (keys %myfeeds) {
|
|
|
|
if (exists $feeds{$name}) {
|
2008-12-18 03:27:28 +01:00
|
|
|
foreach my $field (qw{message lastupdate lasttry
|
|
|
|
numposts newposts error}) {
|
2008-02-03 22:48:26 +01:00
|
|
|
$feeds{$name}->{$field}=$myfeeds{$name}->{$field};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# New guids can be created during aggregation.
|
2009-06-13 01:24:47 +02:00
|
|
|
# Guids have a few fields that may be updated during aggregation.
|
2008-02-03 22:48:26 +01:00
|
|
|
# It's also possible that guids were removed from the on-disk state
|
|
|
|
# while the aggregation was in process. That would only happen if
|
|
|
|
# their feed was also removed, so any removed guids added back here
|
|
|
|
# will be garbage collected later.
|
|
|
|
foreach my $guid (keys %myguids) {
|
|
|
|
if (! exists $guids{$guid}) {
|
|
|
|
$guids{$guid}=$myguids{$guid};
|
|
|
|
}
|
2009-06-13 01:24:47 +02:00
|
|
|
else {
|
|
|
|
foreach my $field (qw{md5}) {
|
|
|
|
$guids{$guid}->{$field}=$myguids{$guid}->{$field};
|
|
|
|
}
|
|
|
|
}
|
2008-02-03 22:48:26 +01:00
|
|
|
}
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2008-02-03 22:48:26 +01:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub clearstate () {
|
2008-02-03 09:04:19 +01:00
|
|
|
%feeds=();
|
|
|
|
%guids=();
|
|
|
|
$state_loaded=0;
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2008-02-03 09:04:19 +01:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub expire () {
|
2006-11-01 06:41:37 +01:00
|
|
|
foreach my $feed (values %feeds) {
|
|
|
|
next unless $feed->{expireage} || $feed->{expirecount};
|
|
|
|
my $count=0;
|
2007-10-31 03:50:44 +01:00
|
|
|
my %seen;
|
2008-09-25 21:38:51 +02:00
|
|
|
foreach my $item (sort { ($IkiWiki::pagectime{$b->{page}} || 0) <=> ($IkiWiki::pagectime{$a->{page}} || 0) }
|
2008-09-17 20:27:31 +02:00
|
|
|
grep { exists $_->{page} && $_->{feed} eq $feed->{name} }
|
2006-11-01 06:41:37 +01:00
|
|
|
values %guids) {
|
2008-09-18 00:09:38 +02:00
|
|
|
if ($feed->{expireage}) {
|
2008-09-27 18:50:39 +02:00
|
|
|
my $days_old = (time - ($IkiWiki::pagectime{$item->{page}} || 0)) / 60 / 60 / 24;
|
2006-11-01 06:41:37 +01:00
|
|
|
if ($days_old > $feed->{expireage}) {
|
2006-12-29 05:38:40 +01:00
|
|
|
debug(sprintf(gettext("expiring %s (%s days old)"),
|
2008-01-03 05:38:45 +01:00
|
|
|
$item->{page}, int($days_old)));
|
2006-11-01 06:41:37 +01:00
|
|
|
$item->{expired}=1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
elsif ($feed->{expirecount} &&
|
|
|
|
$count >= $feed->{expirecount}) {
|
2006-12-29 05:38:40 +01:00
|
|
|
debug(sprintf(gettext("expiring %s"), $item->{page}));
|
2006-11-01 06:41:37 +01:00
|
|
|
$item->{expired}=1;
|
|
|
|
}
|
|
|
|
else {
|
2007-10-31 03:56:13 +01:00
|
|
|
if (! $seen{$item->{page}}) {
|
|
|
|
$seen{$item->{page}}=1;
|
|
|
|
$count++;
|
|
|
|
}
|
2006-11-01 06:41:37 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2006-11-01 06:41:37 +01:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub needsaggregate () {
|
2008-02-03 22:48:26 +01:00
|
|
|
return values %feeds if $config{rebuild};
|
|
|
|
return grep { time - $_->{lastupdate} >= $_->{updateinterval} } values %feeds;
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2008-02-03 22:48:26 +01:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub aggregate (@) {
|
2011-05-09 20:00:48 +02:00
|
|
|
eval q{use Net::INET6Glue::INET_is_INET6}; # may not be available
|
2006-07-30 06:31:08 +02:00
|
|
|
eval q{use XML::Feed};
|
2006-11-08 22:03:33 +01:00
|
|
|
error($@) if $@;
|
2007-04-23 20:36:44 +02:00
|
|
|
eval q{use URI::Fetch};
|
|
|
|
error($@) if $@;
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2008-02-03 22:48:26 +01:00
|
|
|
foreach my $feed (@_) {
|
2008-12-18 03:27:28 +01:00
|
|
|
$feed->{lasttry}=time;
|
2006-07-30 06:31:08 +02:00
|
|
|
$feed->{newposts}=0;
|
2008-10-19 21:17:00 +02:00
|
|
|
$feed->{message}=sprintf(gettext("last checked %s"),
|
2008-12-18 03:27:28 +01:00
|
|
|
displaytime($feed->{lasttry}));
|
2007-04-23 20:36:44 +02:00
|
|
|
$feed->{error}=0;
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2007-01-03 05:19:51 +01:00
|
|
|
debug(sprintf(gettext("checking feed %s ..."), $feed->{name}));
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2006-07-30 08:14:44 +02:00
|
|
|
if (! length $feed->{feedurl}) {
|
|
|
|
my @urls=XML::Feed->find_feeds($feed->{url});
|
|
|
|
if (! @urls) {
|
2007-03-31 05:26:43 +02:00
|
|
|
$feed->{message}=sprintf(gettext("could not find feed at %s"), $feed->{url});
|
2006-10-13 20:31:18 +02:00
|
|
|
$feed->{error}=1;
|
2006-09-10 00:50:27 +02:00
|
|
|
debug($feed->{message});
|
2006-07-30 08:14:44 +02:00
|
|
|
next;
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
2006-07-30 08:14:44 +02:00
|
|
|
$feed->{feedurl}=pop @urls;
|
|
|
|
}
|
2019-02-10 18:22:06 +01:00
|
|
|
# Using the for_url parameter makes sure we crash if used
|
|
|
|
# with an older IkiWiki.pm that didn't automatically try
|
|
|
|
# to use LWPx::ParanoidAgent.
|
|
|
|
my $ua=useragent(for_url => $feed->{feedurl});
|
2013-07-27 14:12:01 +02:00
|
|
|
my $res=URI::Fetch->fetch($feed->{feedurl}, UserAgent=>$ua);
|
2007-04-23 20:36:44 +02:00
|
|
|
if (! $res) {
|
|
|
|
$feed->{message}=URI::Fetch->errstr;
|
|
|
|
$feed->{error}=1;
|
|
|
|
debug($feed->{message});
|
|
|
|
next;
|
|
|
|
}
|
2008-12-18 03:27:28 +01:00
|
|
|
|
|
|
|
# lastupdate is only set if we were able to contact the server
|
|
|
|
$feed->{lastupdate}=$feed->{lasttry};
|
|
|
|
|
2007-04-23 20:36:44 +02:00
|
|
|
if ($res->status == URI::Fetch::URI_GONE()) {
|
|
|
|
$feed->{message}=gettext("feed not found");
|
|
|
|
$feed->{error}=1;
|
|
|
|
debug($feed->{message});
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
my $content=$res->content;
|
2022-08-30 01:14:17 +02:00
|
|
|
|
|
|
|
# This is a hack to support the media:content extension
|
|
|
|
# to RSS. XML::Feed does not support it, but it's the same
|
|
|
|
# as an enclosure, so converting it to that tag will let it
|
|
|
|
# parse.
|
|
|
|
$content=~s/<media:content/<enclosure/g;
|
|
|
|
$content=~s/<\/media:content/<\/enclosure/g;
|
|
|
|
|
2007-04-23 20:36:44 +02:00
|
|
|
my $f=eval{XML::Feed->parse(\$content)};
|
|
|
|
if ($@) {
|
|
|
|
# One common cause of XML::Feed crashing is a feed
|
|
|
|
# that contains invalid UTF-8 sequences. Convert
|
|
|
|
# feed to ascii to try to work around.
|
2007-04-23 21:32:21 +02:00
|
|
|
$feed->{message}.=" ".sprintf(gettext("(invalid UTF-8 stripped from feed)"));
|
2008-11-17 21:56:15 +01:00
|
|
|
$f=eval {
|
|
|
|
$content=Encode::decode_utf8($content, 0);
|
|
|
|
XML::Feed->parse(\$content)
|
|
|
|
};
|
2007-04-23 20:36:44 +02:00
|
|
|
}
|
2008-01-09 02:41:25 +01:00
|
|
|
if ($@) {
|
|
|
|
# Another possibility is badly escaped entities.
|
|
|
|
$feed->{message}.=" ".sprintf(gettext("(feed entities escaped)"));
|
|
|
|
$content=~s/\&(?!amp)(\w+);/&$1;/g;
|
2008-11-17 21:56:15 +01:00
|
|
|
$f=eval {
|
|
|
|
$content=Encode::decode_utf8($content, 0);
|
|
|
|
XML::Feed->parse(\$content)
|
|
|
|
};
|
2008-01-09 02:41:25 +01:00
|
|
|
}
|
2006-07-30 08:14:44 +02:00
|
|
|
if ($@) {
|
protect $@ whenever a block using $@ is non-trivial
As noted in the Try::Tiny man page, eval/$@ can be quite awkward in
corner cases, because $@ has the same properties and problems as C's
errno. While writing a regression test for definetemplate
in which it couldn't find an appropriate template, I received
<span class="error">Error: failed to process template
<span class="createlink">deftmpl</span> </span>
instead of the intended
<span class="error">Error: failed to process template
<span class="createlink">deftmpl</span> template deftmpl not
found</span>
which turned out to be because the "catch"-analogous block called
gettext before it used $@, and gettext can call define_gettext,
which uses eval.
This commit alters all current "catch"-like blocks that use $@, except
those that just do trivial things with $@ (string interpolation, string
concatenation) and call a function (die, error, print, etc.)
2014-02-21 18:06:36 +01:00
|
|
|
# gettext can clobber $@
|
|
|
|
my $error = $@;
|
|
|
|
$feed->{message}=gettext("feed crashed XML::Feed!")." ($error)";
|
2006-10-13 20:31:18 +02:00
|
|
|
$feed->{error}=1;
|
2006-09-10 00:50:27 +02:00
|
|
|
debug($feed->{message});
|
2006-07-30 08:14:44 +02:00
|
|
|
next;
|
|
|
|
}
|
|
|
|
if (! $f) {
|
|
|
|
$feed->{message}=XML::Feed->errstr;
|
2006-10-13 20:31:18 +02:00
|
|
|
$feed->{error}=1;
|
2006-09-10 00:50:27 +02:00
|
|
|
debug($feed->{message});
|
2006-07-30 08:14:44 +02:00
|
|
|
next;
|
|
|
|
}
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2006-07-30 08:14:44 +02:00
|
|
|
foreach my $entry ($f->entries) {
|
2009-02-03 20:48:55 +01:00
|
|
|
# XML::Feed doesn't work around XML::Atom's bizarre
|
|
|
|
# API, so we will. Real unicode strings? Yes please.
|
|
|
|
# See [[bugs/Aggregated_Atom_feeds_are_double-encoded]]
|
2016-01-18 09:45:18 +01:00
|
|
|
no warnings 'once';
|
2009-02-03 20:48:55 +01:00
|
|
|
local $XML::Atom::ForceUnicode = 1;
|
2016-01-18 09:45:18 +01:00
|
|
|
use warnings;
|
2009-02-03 20:48:55 +01:00
|
|
|
|
2008-11-06 22:05:10 +01:00
|
|
|
my $c=$entry->content;
|
2008-09-17 21:56:58 +02:00
|
|
|
# atom feeds may have no content, only a summary
|
2008-11-06 22:05:10 +01:00
|
|
|
if (! defined $c && ref $entry->summary) {
|
|
|
|
$c=$entry->summary;
|
2008-09-17 21:56:58 +02:00
|
|
|
}
|
|
|
|
|
2006-07-30 08:14:44 +02:00
|
|
|
add_page(
|
|
|
|
feed => $feed,
|
2008-01-09 02:41:25 +01:00
|
|
|
copyright => $f->copyright,
|
2006-07-30 08:14:44 +02:00
|
|
|
title => defined $entry->title ? decode_entities($entry->title) : "untitled",
|
2013-07-17 22:38:08 +02:00
|
|
|
author => defined $entry->author ? decode_entities($entry->author) : "",
|
2006-07-30 08:14:44 +02:00
|
|
|
link => $entry->link,
|
aggregate enclosures
aggregate: When a feed has an enclosure that is an image, audio, or video,
include the enclosure in the generated page.
The enclosure is hotlinked from the original feed, not copied.
My use case is to include a mastodon rss feed in amoung other rss feeds for
users who don't use mastodon. It could also be used to aggregate together
podcasts, etc.
Other enclosure types than image, audio, video, could be added, perhaps
a generic one? But these are the main ones.
The template uses 50% width for image and video, because often
attachments are in a high resolution, which will default to being
perhaps too wide for the page, or taking up a lot of vertical space. By
making it take up at most half the page width, that is avoided, while
also leaving room for any sidebar.
Sponsored-by: Shae Erisson on Patreon
2021-12-25 17:58:24 +01:00
|
|
|
enclosureurl => defined $entry->enclosure ? $entry->enclosure->url : "",
|
|
|
|
enclosureimage => (defined $entry->enclosure && $entry->enclosure->type =~ m/image\//) ? "1" : "",
|
|
|
|
enclosureaudio => (defined $entry->enclosure && $entry->enclosure->type =~ m/audio\//) ? "1" : "",
|
|
|
|
enclosurevideo => (defined $entry->enclosure && $entry->enclosure->type =~ m/video\//) ? "1" : "",
|
2008-11-18 19:46:03 +01:00
|
|
|
content => (defined $c && defined $c->body) ? $c->body : "",
|
2008-04-03 08:36:01 +02:00
|
|
|
guid => defined $entry->id ? $entry->id : time."_".$feed->{name},
|
2006-07-30 08:14:44 +02:00
|
|
|
ctime => $entry->issued ? ($entry->issued->epoch || time) : time,
|
2008-11-06 22:05:10 +01:00
|
|
|
base => (defined $c && $c->can("base")) ? $c->base : undef,
|
2006-07-30 08:14:44 +02:00
|
|
|
);
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
|
|
|
}
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub add_page (@) {
|
2006-07-30 06:31:08 +02:00
|
|
|
my %params=@_;
|
2006-07-30 07:29:03 +02:00
|
|
|
|
2006-07-30 06:31:08 +02:00
|
|
|
my $feed=$params{feed};
|
|
|
|
my $guid={};
|
|
|
|
my $mtime;
|
|
|
|
if (exists $guids{$params{guid}}) {
|
|
|
|
# updating an existing post
|
|
|
|
$guid=$guids{$params{guid}};
|
2006-11-06 05:27:29 +01:00
|
|
|
return if $guid->{expired};
|
2011-06-11 01:04:18 +02:00
|
|
|
write_page($feed, $guid, $mtime, \%params);
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
# new post
|
|
|
|
$guid->{guid}=$params{guid};
|
|
|
|
$guids{$params{guid}}=$guid;
|
|
|
|
$mtime=$params{ctime};
|
|
|
|
$feed->{numposts}++;
|
|
|
|
$feed->{newposts}++;
|
|
|
|
|
|
|
|
# assign it an unused page
|
2008-09-27 20:14:36 +02:00
|
|
|
my $page=titlepage($params{title});
|
2006-07-31 02:59:54 +02:00
|
|
|
# escape slashes and periods in title so it doesn't specify
|
|
|
|
# directory name or trigger ".." disallowing code.
|
|
|
|
$page=~s!([/.])!"__".ord($1)."__"!eg;
|
2006-07-30 06:31:08 +02:00
|
|
|
if (! defined $page || ! length $page) {
|
|
|
|
$page=$feed->{dir}."/item";
|
|
|
|
}
|
2018-10-28 23:57:45 +01:00
|
|
|
$page=$feed->{dir}."/".$page;
|
|
|
|
($page)=$page=~/$config{wiki_file_regexp}/;
|
2006-07-30 06:31:08 +02:00
|
|
|
my $c="";
|
2006-08-16 19:37:36 +02:00
|
|
|
while (exists $IkiWiki::pagecase{lc $page.$c} ||
|
2010-12-26 00:27:59 +01:00
|
|
|
-e $IkiWiki::Plugin::transient::transientdir."/".htmlfn($page.$c) ||
|
2008-09-30 00:56:24 +02:00
|
|
|
-e "$config{srcdir}/".htmlfn($page.$c)) {
|
2006-07-30 06:31:08 +02:00
|
|
|
$c++
|
|
|
|
}
|
2013-12-29 01:02:22 +01:00
|
|
|
$page=$page.$c;
|
2007-01-14 05:17:53 +01:00
|
|
|
|
2011-06-11 01:04:18 +02:00
|
|
|
$guid->{page}=$page;
|
|
|
|
eval { write_page($feed, $guid, $mtime, \%params) };
|
|
|
|
if ($@) {
|
|
|
|
# assume failure was due to a too long filename
|
2007-01-14 05:17:53 +01:00
|
|
|
$c="";
|
2007-01-14 06:01:46 +01:00
|
|
|
$page=$feed->{dir}."/item";
|
2007-01-14 05:17:53 +01:00
|
|
|
while (exists $IkiWiki::pagecase{lc $page.$c} ||
|
2010-12-26 00:27:59 +01:00
|
|
|
-e $IkiWiki::Plugin::transient::transientdir."/".htmlfn($page.$c) ||
|
2011-06-11 01:04:18 +02:00
|
|
|
-e "$config{srcdir}/".htmlfn($page.$c)) {
|
2007-01-14 05:17:53 +01:00
|
|
|
$c++
|
|
|
|
}
|
2013-12-29 01:02:22 +01:00
|
|
|
$page=$page.$c;
|
2011-06-11 01:04:18 +02:00
|
|
|
|
|
|
|
$guid->{page}=$page;
|
|
|
|
write_page($feed, $guid, $mtime, \%params);
|
2007-01-14 05:17:53 +01:00
|
|
|
}
|
|
|
|
|
2006-12-29 05:38:40 +01:00
|
|
|
debug(sprintf(gettext("creating new page %s"), $page));
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
2011-06-11 01:04:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
sub write_page ($$$$$) {
|
|
|
|
my $feed=shift;
|
|
|
|
my $guid=shift;
|
|
|
|
my $mtime=shift;
|
|
|
|
my %params=%{shift()};
|
|
|
|
|
2006-07-30 06:31:08 +02:00
|
|
|
$guid->{feed}=$feed->{name};
|
|
|
|
|
|
|
|
# To write or not to write? Need to avoid writing unchanged pages
|
|
|
|
# to avoid unneccessary rebuilding. The mtime from rss cannot be
|
|
|
|
# trusted; let's use a digest.
|
|
|
|
eval q{use Digest::MD5 'md5_hex'};
|
2006-11-08 22:03:33 +01:00
|
|
|
error($@) if $@;
|
2006-07-30 07:17:59 +02:00
|
|
|
require Encode;
|
|
|
|
my $digest=md5_hex(Encode::encode_utf8($params{content}));
|
2006-09-10 00:50:27 +02:00
|
|
|
return unless ! exists $guid->{md5} || $guid->{md5} ne $digest || $config{rebuild};
|
2006-07-30 06:31:08 +02:00
|
|
|
$guid->{md5}=$digest;
|
|
|
|
|
|
|
|
# Create the page.
|
2010-09-27 21:44:04 +02:00
|
|
|
my $template;
|
|
|
|
eval {
|
|
|
|
$template=template($feed->{template}, blind_cache => 1);
|
|
|
|
};
|
|
|
|
if ($@) {
|
protect $@ whenever a block using $@ is non-trivial
As noted in the Try::Tiny man page, eval/$@ can be quite awkward in
corner cases, because $@ has the same properties and problems as C's
errno. While writing a regression test for definetemplate
in which it couldn't find an appropriate template, I received
<span class="error">Error: failed to process template
<span class="createlink">deftmpl</span> </span>
instead of the intended
<span class="error">Error: failed to process template
<span class="createlink">deftmpl</span> template deftmpl not
found</span>
which turned out to be because the "catch"-analogous block called
gettext before it used $@, and gettext can call define_gettext,
which uses eval.
This commit alters all current "catch"-like blocks that use $@, except
those that just do trivial things with $@ (string interpolation, string
concatenation) and call a function (die, error, print, etc.)
2014-02-21 18:06:36 +01:00
|
|
|
# gettext can clobber $@
|
|
|
|
my $error = $@;
|
|
|
|
print STDERR gettext("failed to process template:")." $error";
|
2010-09-27 21:44:04 +02:00
|
|
|
return;
|
|
|
|
}
|
2006-07-31 00:58:48 +02:00
|
|
|
$template->param(title => $params{title})
|
|
|
|
if defined $params{title} && length($params{title});
|
2013-07-17 22:38:08 +02:00
|
|
|
$template->param(author => $params{author})
|
|
|
|
if defined $params{author} && length($params{author}
|
|
|
|
&& $params{author} ne $feed->{name});
|
2008-11-18 08:43:17 +01:00
|
|
|
$template->param(content => wikiescape(htmlabs($params{content},
|
2008-11-06 22:05:10 +01:00
|
|
|
defined $params{base} ? $params{base} : $feed->{feedurl})));
|
2006-07-30 06:31:08 +02:00
|
|
|
$template->param(name => $feed->{name});
|
2006-08-04 02:59:00 +02:00
|
|
|
$template->param(url => $feed->{url});
|
2008-01-09 02:41:25 +01:00
|
|
|
$template->param(copyright => $params{copyright})
|
|
|
|
if defined $params{copyright} && length $params{copyright};
|
2011-01-05 21:18:25 +01:00
|
|
|
$template->param(permalink => IkiWiki::urlabs($params{link}, $feed->{feedurl}))
|
2006-08-03 23:50:47 +02:00
|
|
|
if defined $params{link};
|
aggregate enclosures
aggregate: When a feed has an enclosure that is an image, audio, or video,
include the enclosure in the generated page.
The enclosure is hotlinked from the original feed, not copied.
My use case is to include a mastodon rss feed in amoung other rss feeds for
users who don't use mastodon. It could also be used to aggregate together
podcasts, etc.
Other enclosure types than image, audio, video, could be added, perhaps
a generic one? But these are the main ones.
The template uses 50% width for image and video, because often
attachments are in a high resolution, which will default to being
perhaps too wide for the page, or taking up a lot of vertical space. By
making it take up at most half the page width, that is avoided, while
also leaving room for any sidebar.
Sponsored-by: Shae Erisson on Patreon
2021-12-25 17:58:24 +01:00
|
|
|
$template->param(enclosureurl => $params{enclosureurl})
|
|
|
|
if defined $params{enclosureurl} && length $params{enclosureurl};
|
|
|
|
$template->param(enclosureimage => $params{enclosureimage})
|
|
|
|
if defined $params{enclosureimage} && length $params{enclosureimage};
|
|
|
|
$template->param(enclosureaudio => $params{enclosureaudio})
|
|
|
|
if defined $params{enclosureaudio} && length $params{enclosureaudio};
|
|
|
|
$template->param(enclosurevideo => $params{enclosurevideo})
|
|
|
|
if defined $params{enclosurevideo} && length $params{enclosurevideo};
|
2006-07-30 06:31:08 +02:00
|
|
|
if (ref $feed->{tags}) {
|
2006-07-30 07:14:35 +02:00
|
|
|
$template->param(tags => [map { tag => $_ }, @{$feed->{tags}}]);
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
2010-12-26 00:27:59 +01:00
|
|
|
writefile(htmlfn($guid->{page}),
|
|
|
|
$IkiWiki::Plugin::transient::transientdir, $template->output);
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2008-09-18 00:09:38 +02:00
|
|
|
if (defined $mtime && $mtime <= time) {
|
|
|
|
# Set the mtime, this lets the build process get the right
|
|
|
|
# creation time on record for the new page.
|
2010-12-26 00:27:59 +01:00
|
|
|
utime $mtime, $mtime,
|
|
|
|
$IkiWiki::Plugin::transient::transientdir."/".htmlfn($guid->{page});
|
2008-09-18 00:09:38 +02:00
|
|
|
# Store it in pagectime for expiry code to use also.
|
2009-06-13 01:31:18 +02:00
|
|
|
$IkiWiki::pagectime{$guid->{page}}=$mtime
|
|
|
|
unless exists $IkiWiki::pagectime{$guid->{page}};
|
2008-09-18 00:09:38 +02:00
|
|
|
}
|
2008-10-17 00:20:16 +02:00
|
|
|
else {
|
|
|
|
# Dummy value for expiry code.
|
2009-06-13 01:31:18 +02:00
|
|
|
$IkiWiki::pagectime{$guid->{page}}=time
|
|
|
|
unless exists $IkiWiki::pagectime{$guid->{page}};
|
2008-10-17 00:20:16 +02:00
|
|
|
}
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub wikiescape ($) {
|
2006-08-03 23:50:47 +02:00
|
|
|
# escape accidental wikilinks and preprocessor stuff
|
2008-11-18 08:43:17 +01:00
|
|
|
return encode_entities(shift, '\[\]');
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2006-08-03 23:50:47 +02:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub htmlabs ($$) {
|
2006-08-03 23:50:47 +02:00
|
|
|
# Convert links in html from relative to absolute.
|
|
|
|
# Note that this is a heuristic, which is not specified by the rss
|
|
|
|
# spec and may not be right for all feeds. Also, see Debian
|
2006-08-28 06:40:00 +02:00
|
|
|
# bug #381359.
|
2006-08-03 23:50:47 +02:00
|
|
|
my $html=shift;
|
|
|
|
my $urlbase=shift;
|
|
|
|
|
|
|
|
my $ret="";
|
|
|
|
my $p = HTML::Parser->new(api_version => 3);
|
|
|
|
$p->handler(default => sub { $ret.=join("", @_) }, "text");
|
|
|
|
$p->handler(start => sub {
|
|
|
|
my ($tagname, $pos, $text) = @_;
|
|
|
|
if (ref $HTML::Tagset::linkElements{$tagname}) {
|
|
|
|
while (4 <= @$pos) {
|
|
|
|
# use attribute sets from right to left
|
|
|
|
# to avoid invalidating the offsets
|
|
|
|
# when replacing the values
|
|
|
|
my($k_offset, $k_len, $v_offset, $v_len) =
|
|
|
|
splice(@$pos, -4);
|
|
|
|
my $attrname = lc(substr($text, $k_offset, $k_len));
|
|
|
|
next unless grep { $_ eq $attrname } @{$HTML::Tagset::linkElements{$tagname}};
|
|
|
|
next unless $v_offset; # 0 v_offset means no value
|
|
|
|
my $v = substr($text, $v_offset, $v_len);
|
|
|
|
$v =~ s/^([\'\"])(.*)\1$/$2/;
|
2011-01-05 21:18:25 +01:00
|
|
|
my $new_v=IkiWiki::urlabs($v, $urlbase);
|
2006-08-03 23:50:47 +02:00
|
|
|
$new_v =~ s/\"/"/g; # since we quote with ""
|
|
|
|
substr($text, $v_offset, $v_len) = qq("$new_v");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$ret.=$text;
|
|
|
|
}, "tagname, tokenpos, text");
|
|
|
|
$p->parse($html);
|
|
|
|
$p->eof;
|
|
|
|
|
|
|
|
return $ret;
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2006-08-03 23:50:47 +02:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub htmlfn ($) {
|
2008-07-15 02:04:29 +02:00
|
|
|
return shift().".".($config{aggregateinternal} ? "_aggregated" : $config{htmlext});
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2007-04-01 21:59:42 +02:00
|
|
|
|
2008-02-03 21:17:15 +01:00
|
|
|
my $aggregatelock;
|
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub lockaggregate () {
|
2008-02-03 21:17:15 +01:00
|
|
|
# Take an exclusive lock to prevent multiple concurrent aggregators.
|
|
|
|
# Returns true if the lock was aquired.
|
|
|
|
if (! -d $config{wikistatedir}) {
|
|
|
|
mkdir($config{wikistatedir});
|
|
|
|
}
|
|
|
|
open($aggregatelock, '>', "$config{wikistatedir}/aggregatelock") ||
|
|
|
|
error ("cannot open to $config{wikistatedir}/aggregatelock: $!");
|
|
|
|
if (! flock($aggregatelock, 2 | 4)) { # LOCK_EX | LOCK_NB
|
|
|
|
close($aggregatelock) || error("failed closing aggregatelock: $!");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2008-02-03 21:17:15 +01:00
|
|
|
|
2008-12-17 21:22:16 +01:00
|
|
|
sub unlockaggregate () {
|
2008-02-03 21:17:15 +01:00
|
|
|
return close($aggregatelock) if $aggregatelock;
|
|
|
|
return;
|
2008-12-17 21:22:16 +01:00
|
|
|
}
|
2008-02-03 21:17:15 +01:00
|
|
|
|
2006-07-30 06:31:08 +02:00
|
|
|
1
|