2006-07-30 06:31:08 +02:00
|
|
|
#!/usr/bin/perl
|
|
|
|
# Blog aggregation plugin.
|
|
|
|
package IkiWiki::Plugin::aggregate;
|
|
|
|
|
|
|
|
use warnings;
|
|
|
|
use strict;
|
2007-04-27 04:55:52 +02:00
|
|
|
use IkiWiki 2.00;
|
2006-08-02 02:52:47 +02:00
|
|
|
use HTML::Entities;
|
2006-08-03 23:50:47 +02:00
|
|
|
use HTML::Parser;
|
|
|
|
use HTML::Tagset;
|
|
|
|
use URI;
|
2006-11-08 21:13:59 +01:00
|
|
|
use open qw{:utf8 :std};
|
2006-07-30 06:31:08 +02:00
|
|
|
|
|
|
|
my %feeds;
|
|
|
|
my %guids;
|
|
|
|
|
|
|
|
sub import { #{{{
|
2006-09-10 00:50:27 +02:00
|
|
|
hook(type => "getopt", id => "aggregate", call => \&getopt);
|
|
|
|
hook(type => "checkconfig", id => "aggregate", call => \&checkconfig);
|
* meta: Drop support for "meta link", since supporting this for internal
links required meta to be run during scan, which complicated its data
storage, since it had to clear data stored during the scan pass to avoid
duplicating it during the normal preprocessing pass.
* If you used "meta link", you should switch to either "meta openid" (for
openid delegations), or tags (for internal, invisible links). I assume
that nobody really used "meta link" for external, non-openid links, since
the htmlscrubber ate those. (Tell me differently and I'll consider bringing
back that support.)
* meta: Improved data storage.
* meta: Drop the hackish filter hook that was used to clear
stored data before preprocessing, this hack was ugly, and broken (cf:
liw's disappearing openids).
* aggregate: Convert filter hook to a needsbuild hook.
2007-12-16 21:56:09 +01:00
|
|
|
hook(type => "needsbuild", id => "aggregate", call => \&needsbuild);
|
2006-09-10 00:50:27 +02:00
|
|
|
hook(type => "preprocess", id => "aggregate", call => \&preprocess);
|
|
|
|
hook(type => "delete", id => "aggregate", call => \&delete);
|
|
|
|
hook(type => "savestate", id => "aggregate", call => \&savestate);
|
2006-07-30 06:31:08 +02:00
|
|
|
} # }}}
|
|
|
|
|
|
|
|
sub getopt () { #{{{
|
|
|
|
eval q{use Getopt::Long};
|
2006-11-08 22:03:33 +01:00
|
|
|
error($@) if $@;
|
2006-07-30 06:31:08 +02:00
|
|
|
Getopt::Long::Configure('pass_through');
|
2006-09-10 00:50:27 +02:00
|
|
|
GetOptions("aggregate" => \$config{aggregate});
|
2006-07-30 06:31:08 +02:00
|
|
|
} #}}}
|
|
|
|
|
|
|
|
sub checkconfig () { #{{{
|
2007-05-21 04:52:51 +02:00
|
|
|
if ($config{aggregate} && ! ($config{post_commit} &&
|
|
|
|
IkiWiki::commit_hook_enabled())) {
|
2007-05-21 05:00:45 +02:00
|
|
|
if (! IkiWiki::lockwiki(0)) {
|
|
|
|
debug("wiki is locked by another process, not aggregating");
|
|
|
|
exit 1;
|
|
|
|
}
|
2008-02-03 09:04:19 +01:00
|
|
|
|
2008-02-03 05:56:13 +01:00
|
|
|
loadstate();
|
2008-02-03 09:04:19 +01:00
|
|
|
IkiWiki::loadindex();
|
|
|
|
aggregate();
|
|
|
|
expire();
|
|
|
|
savestate();
|
|
|
|
clearstate();
|
2008-02-03 05:56:13 +01:00
|
|
|
|
2007-05-21 04:52:51 +02:00
|
|
|
IkiWiki::unlockwiki();
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
|
|
|
} #}}}
|
|
|
|
|
* meta: Drop support for "meta link", since supporting this for internal
links required meta to be run during scan, which complicated its data
storage, since it had to clear data stored during the scan pass to avoid
duplicating it during the normal preprocessing pass.
* If you used "meta link", you should switch to either "meta openid" (for
openid delegations), or tags (for internal, invisible links). I assume
that nobody really used "meta link" for external, non-openid links, since
the htmlscrubber ate those. (Tell me differently and I'll consider bringing
back that support.)
* meta: Improved data storage.
* meta: Drop the hackish filter hook that was used to clear
stored data before preprocessing, this hack was ugly, and broken (cf:
liw's disappearing openids).
* aggregate: Convert filter hook to a needsbuild hook.
2007-12-16 21:56:09 +01:00
|
|
|
sub needsbuild (@) { #{{{
|
|
|
|
my $needsbuild=shift;
|
2007-12-17 01:40:50 +01:00
|
|
|
|
|
|
|
loadstate(); # if not already loaded
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2007-12-30 20:49:25 +01:00
|
|
|
foreach my $feed (values %feeds) {
|
2008-01-29 23:50:11 +01:00
|
|
|
if (exists $pagesources{$feed->{sourcepage}} &&
|
2008-01-29 23:36:25 +01:00
|
|
|
grep { $_ eq $pagesources{$feed->{sourcepage}} } @$needsbuild) {
|
* meta: Drop support for "meta link", since supporting this for internal
links required meta to be run during scan, which complicated its data
storage, since it had to clear data stored during the scan pass to avoid
duplicating it during the normal preprocessing pass.
* If you used "meta link", you should switch to either "meta openid" (for
openid delegations), or tags (for internal, invisible links). I assume
that nobody really used "meta link" for external, non-openid links, since
the htmlscrubber ate those. (Tell me differently and I'll consider bringing
back that support.)
* meta: Improved data storage.
* meta: Drop the hackish filter hook that was used to clear
stored data before preprocessing, this hack was ugly, and broken (cf:
liw's disappearing openids).
* aggregate: Convert filter hook to a needsbuild hook.
2007-12-16 21:56:09 +01:00
|
|
|
# Mark all feeds originating on this page as removable;
|
|
|
|
# preprocess will unmark those that still exist.
|
2007-12-30 20:49:25 +01:00
|
|
|
remove_feeds($feed->{sourcepage});
|
* meta: Drop support for "meta link", since supporting this for internal
links required meta to be run during scan, which complicated its data
storage, since it had to clear data stored during the scan pass to avoid
duplicating it during the normal preprocessing pass.
* If you used "meta link", you should switch to either "meta openid" (for
openid delegations), or tags (for internal, invisible links). I assume
that nobody really used "meta link" for external, non-openid links, since
the htmlscrubber ate those. (Tell me differently and I'll consider bringing
back that support.)
* meta: Improved data storage.
* meta: Drop the hackish filter hook that was used to clear
stored data before preprocessing, this hack was ugly, and broken (cf:
liw's disappearing openids).
* aggregate: Convert filter hook to a needsbuild hook.
2007-12-16 21:56:09 +01:00
|
|
|
}
|
|
|
|
}
|
2006-07-30 06:31:08 +02:00
|
|
|
} # }}}
|
|
|
|
|
|
|
|
sub preprocess (@) { #{{{
|
|
|
|
my %params=@_;
|
|
|
|
|
2006-07-30 07:14:35 +02:00
|
|
|
foreach my $required (qw{name url}) {
|
2006-07-30 06:31:08 +02:00
|
|
|
if (! exists $params{$required}) {
|
2007-03-07 13:04:45 +01:00
|
|
|
return "[[aggregate ".sprintf(gettext("missing %s parameter"), $required)."]]";
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
my $feed={};
|
|
|
|
my $name=$params{name};
|
|
|
|
if (exists $feeds{$name}) {
|
|
|
|
$feed=$feeds{$name};
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
$feeds{$name}=$feed;
|
|
|
|
}
|
|
|
|
$feed->{name}=$name;
|
|
|
|
$feed->{sourcepage}=$params{page};
|
|
|
|
$feed->{url}=$params{url};
|
2006-07-31 03:05:22 +02:00
|
|
|
my $dir=exists $params{dir} ? $params{dir} : $params{page}."/".IkiWiki::titlepage($params{name});
|
2006-07-30 07:14:35 +02:00
|
|
|
$dir=~s/^\/+//;
|
2006-09-10 00:50:27 +02:00
|
|
|
($dir)=$dir=~/$config{wiki_file_regexp}/;
|
2006-07-30 07:14:35 +02:00
|
|
|
$feed->{dir}=$dir;
|
2006-07-30 08:14:44 +02:00
|
|
|
$feed->{feedurl}=defined $params{feedurl} ? $params{feedurl} : "";
|
2006-07-30 07:14:35 +02:00
|
|
|
$feed->{updateinterval}=defined $params{updateinterval} ? $params{updateinterval} * 60 : 15 * 60;
|
2006-07-30 06:31:08 +02:00
|
|
|
$feed->{expireage}=defined $params{expireage} ? $params{expireage} : 0;
|
|
|
|
$feed->{expirecount}=defined $params{expirecount} ? $params{expirecount} : 0;
|
|
|
|
delete $feed->{remove};
|
2006-11-01 06:41:37 +01:00
|
|
|
delete $feed->{expired};
|
2006-07-30 06:31:08 +02:00
|
|
|
$feed->{lastupdate}=0 unless defined $feed->{lastupdate};
|
|
|
|
$feed->{numposts}=0 unless defined $feed->{numposts};
|
|
|
|
$feed->{newposts}=0 unless defined $feed->{newposts};
|
2006-12-29 05:38:40 +01:00
|
|
|
$feed->{message}=gettext("new feed") unless defined $feed->{message};
|
2006-10-13 20:31:18 +02:00
|
|
|
$feed->{error}=0 unless defined $feed->{error};
|
2006-07-30 08:57:42 +02:00
|
|
|
$feed->{tags}=[];
|
2006-07-30 06:31:08 +02:00
|
|
|
while (@_) {
|
|
|
|
my $key=shift;
|
|
|
|
my $value=shift;
|
|
|
|
if ($key eq 'tag') {
|
|
|
|
push @{$feed->{tags}}, $value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return "<a href=\"".$feed->{url}."\">".$feed->{name}."</a>: ".
|
2006-10-13 20:31:18 +02:00
|
|
|
($feed->{error} ? "<em>" : "").$feed->{message}.
|
|
|
|
($feed->{error} ? "</em>" : "").
|
2006-12-29 05:38:40 +01:00
|
|
|
" (".$feed->{numposts}." ".gettext("posts").
|
|
|
|
($feed->{newposts} ? "; ".$feed->{newposts}.
|
|
|
|
" ".gettext("new") : "").
|
2006-09-08 01:54:37 +02:00
|
|
|
")";
|
2006-07-30 06:31:08 +02:00
|
|
|
} # }}}
|
|
|
|
|
|
|
|
sub delete (@) { #{{{
|
|
|
|
my @files=@_;
|
|
|
|
|
|
|
|
# Remove feed data for removed pages.
|
|
|
|
foreach my $file (@files) {
|
2006-09-10 00:50:27 +02:00
|
|
|
my $page=pagename($file);
|
2006-07-30 06:31:08 +02:00
|
|
|
remove_feeds($page);
|
|
|
|
}
|
|
|
|
} #}}}
|
|
|
|
|
2007-05-21 04:52:51 +02:00
|
|
|
my $state_loaded=0;
|
2006-07-30 06:31:08 +02:00
|
|
|
sub loadstate () { #{{{
|
2007-05-21 04:52:51 +02:00
|
|
|
return if $state_loaded;
|
2008-01-09 02:41:25 +01:00
|
|
|
$state_loaded=1;
|
2006-09-10 00:50:27 +02:00
|
|
|
if (-e "$config{wikistatedir}/aggregate") {
|
2008-02-03 09:04:19 +01:00
|
|
|
open(IN, "<", "$config{wikistatedir}/aggregate") ||
|
2007-07-28 23:01:56 +02:00
|
|
|
die "$config{wikistatedir}/aggregate: $!";
|
2006-07-30 06:31:08 +02:00
|
|
|
while (<IN>) {
|
|
|
|
$_=IkiWiki::possibly_foolish_untaint($_);
|
|
|
|
chomp;
|
|
|
|
my $data={};
|
|
|
|
foreach my $i (split(/ /, $_)) {
|
|
|
|
my ($field, $val)=split(/=/, $i, 2);
|
2006-07-30 08:20:58 +02:00
|
|
|
if ($field eq "name" || $field eq "feed" ||
|
|
|
|
$field eq "guid" || $field eq "message") {
|
2006-07-30 08:55:33 +02:00
|
|
|
$data->{$field}=decode_entities($val, " \t\n");
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
|
|
|
elsif ($field eq "tag") {
|
|
|
|
push @{$data->{tags}}, $val;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
$data->{$field}=$val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (exists $data->{name}) {
|
|
|
|
$feeds{$data->{name}}=$data;
|
|
|
|
}
|
|
|
|
elsif (exists $data->{guid}) {
|
|
|
|
$guids{$data->{guid}}=$data;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
close IN;
|
|
|
|
}
|
|
|
|
} #}}}
|
|
|
|
|
|
|
|
sub savestate () { #{{{
|
2008-01-05 07:26:09 +01:00
|
|
|
return unless $state_loaded;
|
2006-07-30 08:55:33 +02:00
|
|
|
eval q{use HTML::Entities};
|
2006-11-08 22:03:33 +01:00
|
|
|
error($@) if $@;
|
2007-02-15 03:22:08 +01:00
|
|
|
my $newfile="$config{wikistatedir}/aggregate.new";
|
|
|
|
my $cleanup = sub { unlink($newfile) };
|
2008-02-03 09:04:19 +01:00
|
|
|
open (OUT, ">", $newfile) || error("open $newfile: $!", $cleanup);
|
2006-07-30 06:31:08 +02:00
|
|
|
foreach my $data (values %feeds, values %guids) {
|
|
|
|
if ($data->{remove}) {
|
|
|
|
if ($data->{name}) {
|
|
|
|
foreach my $guid (values %guids) {
|
|
|
|
if ($guid->{feed} eq $data->{name}) {
|
|
|
|
$guid->{remove}=1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2007-12-30 20:49:25 +01:00
|
|
|
unlink pagefile($data->{page})
|
|
|
|
if exists $data->{page};
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
|
|
|
next;
|
|
|
|
}
|
2006-11-01 06:41:37 +01:00
|
|
|
elsif ($data->{expired} && exists $data->{page}) {
|
|
|
|
unlink pagefile($data->{page});
|
|
|
|
delete $data->{page};
|
|
|
|
delete $data->{md5};
|
|
|
|
}
|
2006-07-30 06:31:08 +02:00
|
|
|
|
|
|
|
my @line;
|
|
|
|
foreach my $field (keys %$data) {
|
2006-07-30 08:20:58 +02:00
|
|
|
if ($field eq "name" || $field eq "feed" ||
|
|
|
|
$field eq "guid" || $field eq "message") {
|
2006-07-30 08:55:33 +02:00
|
|
|
push @line, "$field=".encode_entities($data->{$field}, " \t\n");
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
|
|
|
elsif ($field eq "tags") {
|
|
|
|
push @line, "tag=$_" foreach @{$data->{tags}};
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
push @line, "$field=".$data->{$field};
|
|
|
|
}
|
|
|
|
}
|
2007-02-15 03:22:08 +01:00
|
|
|
print OUT join(" ", @line)."\n" || error("write $newfile: $!", $cleanup);
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
2007-02-15 03:22:08 +01:00
|
|
|
close OUT || error("save $newfile: $!", $cleanup);
|
|
|
|
rename($newfile, "$config{wikistatedir}/aggregate") ||
|
|
|
|
error("rename $newfile: $!", $cleanup);
|
2006-07-30 06:31:08 +02:00
|
|
|
} #}}}
|
|
|
|
|
2008-02-03 09:04:19 +01:00
|
|
|
sub clearstate () { #{{{
|
|
|
|
%feeds=();
|
|
|
|
%guids=();
|
|
|
|
$state_loaded=0;
|
|
|
|
} #}}}
|
|
|
|
|
2006-11-01 06:41:37 +01:00
|
|
|
sub expire () { #{{{
|
|
|
|
foreach my $feed (values %feeds) {
|
|
|
|
next unless $feed->{expireage} || $feed->{expirecount};
|
|
|
|
my $count=0;
|
2007-10-31 03:50:44 +01:00
|
|
|
my %seen;
|
2006-11-01 06:41:37 +01:00
|
|
|
foreach my $item (sort { $IkiWiki::pagectime{$b->{page}} <=> $IkiWiki::pagectime{$a->{page}} }
|
2007-10-31 03:56:13 +01:00
|
|
|
grep { exists $_->{page} && $_->{feed} eq $feed->{name} && $IkiWiki::pagectime{$_->{page}} }
|
2006-11-01 06:41:37 +01:00
|
|
|
values %guids) {
|
|
|
|
if ($feed->{expireage}) {
|
|
|
|
my $days_old = (time - $IkiWiki::pagectime{$item->{page}}) / 60 / 60 / 24;
|
|
|
|
if ($days_old > $feed->{expireage}) {
|
2006-12-29 05:38:40 +01:00
|
|
|
debug(sprintf(gettext("expiring %s (%s days old)"),
|
2008-01-03 05:38:45 +01:00
|
|
|
$item->{page}, int($days_old)));
|
2006-11-01 06:41:37 +01:00
|
|
|
$item->{expired}=1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
elsif ($feed->{expirecount} &&
|
|
|
|
$count >= $feed->{expirecount}) {
|
2006-12-29 05:38:40 +01:00
|
|
|
debug(sprintf(gettext("expiring %s"), $item->{page}));
|
2006-11-01 06:41:37 +01:00
|
|
|
$item->{expired}=1;
|
|
|
|
}
|
|
|
|
else {
|
2007-10-31 03:56:13 +01:00
|
|
|
if (! $seen{$item->{page}}) {
|
|
|
|
$seen{$item->{page}}=1;
|
|
|
|
$count++;
|
|
|
|
}
|
2006-11-01 06:41:37 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} #}}}
|
|
|
|
|
2008-02-03 09:04:19 +01:00
|
|
|
sub aggregate () { #{{{
|
2006-07-30 06:31:08 +02:00
|
|
|
eval q{use XML::Feed};
|
2006-11-08 22:03:33 +01:00
|
|
|
error($@) if $@;
|
2007-04-23 20:36:44 +02:00
|
|
|
eval q{use URI::Fetch};
|
|
|
|
error($@) if $@;
|
2006-07-30 06:31:08 +02:00
|
|
|
eval q{use HTML::Entities};
|
2006-11-08 22:03:33 +01:00
|
|
|
error($@) if $@;
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2008-02-03 09:04:19 +01:00
|
|
|
foreach my $feed (values %feeds) {
|
|
|
|
next unless $config{rebuild} ||
|
|
|
|
time - $feed->{lastupdate} >= $feed->{updateinterval};
|
2006-07-30 06:31:08 +02:00
|
|
|
$feed->{lastupdate}=time;
|
|
|
|
$feed->{newposts}=0;
|
2007-04-23 20:36:44 +02:00
|
|
|
$feed->{message}=sprintf(gettext("processed ok at %s"),
|
|
|
|
displaytime($feed->{lastupdate}));
|
|
|
|
$feed->{error}=0;
|
2008-02-03 09:04:19 +01:00
|
|
|
$IkiWiki::forcerebuild{$feed->{sourcepage}}=1;
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2007-01-03 05:19:51 +01:00
|
|
|
debug(sprintf(gettext("checking feed %s ..."), $feed->{name}));
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2006-07-30 08:14:44 +02:00
|
|
|
if (! length $feed->{feedurl}) {
|
|
|
|
my @urls=XML::Feed->find_feeds($feed->{url});
|
|
|
|
if (! @urls) {
|
2007-03-31 05:26:43 +02:00
|
|
|
$feed->{message}=sprintf(gettext("could not find feed at %s"), $feed->{url});
|
2006-10-13 20:31:18 +02:00
|
|
|
$feed->{error}=1;
|
2006-09-10 00:50:27 +02:00
|
|
|
debug($feed->{message});
|
2006-07-30 08:14:44 +02:00
|
|
|
next;
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
2006-07-30 08:14:44 +02:00
|
|
|
$feed->{feedurl}=pop @urls;
|
|
|
|
}
|
2007-04-23 20:36:44 +02:00
|
|
|
my $res=URI::Fetch->fetch($feed->{feedurl});
|
|
|
|
if (! $res) {
|
|
|
|
$feed->{message}=URI::Fetch->errstr;
|
|
|
|
$feed->{error}=1;
|
|
|
|
debug($feed->{message});
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
if ($res->status == URI::Fetch::URI_GONE()) {
|
|
|
|
$feed->{message}=gettext("feed not found");
|
|
|
|
$feed->{error}=1;
|
|
|
|
debug($feed->{message});
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
my $content=$res->content;
|
|
|
|
my $f=eval{XML::Feed->parse(\$content)};
|
|
|
|
if ($@) {
|
|
|
|
# One common cause of XML::Feed crashing is a feed
|
|
|
|
# that contains invalid UTF-8 sequences. Convert
|
|
|
|
# feed to ascii to try to work around.
|
2007-04-23 21:32:21 +02:00
|
|
|
$feed->{message}.=" ".sprintf(gettext("(invalid UTF-8 stripped from feed)"));
|
2007-04-23 20:36:44 +02:00
|
|
|
$content=Encode::decode_utf8($content);
|
|
|
|
$f=eval{XML::Feed->parse(\$content)};
|
|
|
|
}
|
2008-01-09 02:41:25 +01:00
|
|
|
if ($@) {
|
|
|
|
# Another possibility is badly escaped entities.
|
|
|
|
$feed->{message}.=" ".sprintf(gettext("(feed entities escaped)"));
|
|
|
|
$content=~s/\&(?!amp)(\w+);/&$1;/g;
|
|
|
|
$content=Encode::decode_utf8($content);
|
|
|
|
$f=eval{XML::Feed->parse(\$content)};
|
|
|
|
}
|
2006-07-30 08:14:44 +02:00
|
|
|
if ($@) {
|
2006-12-29 05:38:40 +01:00
|
|
|
$feed->{message}=gettext("feed crashed XML::Feed!")." ($@)";
|
2006-10-13 20:31:18 +02:00
|
|
|
$feed->{error}=1;
|
2006-09-10 00:50:27 +02:00
|
|
|
debug($feed->{message});
|
2006-07-30 08:14:44 +02:00
|
|
|
next;
|
|
|
|
}
|
|
|
|
if (! $f) {
|
|
|
|
$feed->{message}=XML::Feed->errstr;
|
2006-10-13 20:31:18 +02:00
|
|
|
$feed->{error}=1;
|
2006-09-10 00:50:27 +02:00
|
|
|
debug($feed->{message});
|
2006-07-30 08:14:44 +02:00
|
|
|
next;
|
|
|
|
}
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2006-07-30 08:14:44 +02:00
|
|
|
foreach my $entry ($f->entries) {
|
|
|
|
add_page(
|
|
|
|
feed => $feed,
|
2008-01-09 02:41:25 +01:00
|
|
|
copyright => $f->copyright,
|
2006-07-30 08:14:44 +02:00
|
|
|
title => defined $entry->title ? decode_entities($entry->title) : "untitled",
|
|
|
|
link => $entry->link,
|
2007-04-23 20:36:44 +02:00
|
|
|
content => defined $entry->content->body ? $entry->content->body : "",
|
2006-07-30 08:14:44 +02:00
|
|
|
guid => defined $entry->id ? $entry->id : time."_".$feed->name,
|
|
|
|
ctime => $entry->issued ? ($entry->issued->epoch || time) : time,
|
|
|
|
);
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} #}}}
|
|
|
|
|
|
|
|
sub add_page (@) { #{{{
|
|
|
|
my %params=@_;
|
2006-07-30 07:29:03 +02:00
|
|
|
|
2006-07-30 06:31:08 +02:00
|
|
|
my $feed=$params{feed};
|
|
|
|
my $guid={};
|
|
|
|
my $mtime;
|
|
|
|
if (exists $guids{$params{guid}}) {
|
|
|
|
# updating an existing post
|
|
|
|
$guid=$guids{$params{guid}};
|
2006-11-06 05:27:29 +01:00
|
|
|
return if $guid->{expired};
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
# new post
|
|
|
|
$guid->{guid}=$params{guid};
|
|
|
|
$guids{$params{guid}}=$guid;
|
|
|
|
$mtime=$params{ctime};
|
|
|
|
$feed->{numposts}++;
|
|
|
|
$feed->{newposts}++;
|
|
|
|
|
|
|
|
# assign it an unused page
|
2006-07-30 23:51:37 +02:00
|
|
|
my $page=IkiWiki::titlepage($params{title});
|
2006-07-31 02:59:54 +02:00
|
|
|
# escape slashes and periods in title so it doesn't specify
|
|
|
|
# directory name or trigger ".." disallowing code.
|
|
|
|
$page=~s!([/.])!"__".ord($1)."__"!eg;
|
2006-07-30 23:51:37 +02:00
|
|
|
$page=$feed->{dir}."/".$page;
|
2006-09-10 00:50:27 +02:00
|
|
|
($page)=$page=~/$config{wiki_file_regexp}/;
|
2006-07-30 06:31:08 +02:00
|
|
|
if (! defined $page || ! length $page) {
|
|
|
|
$page=$feed->{dir}."/item";
|
|
|
|
}
|
|
|
|
my $c="";
|
2006-08-16 19:37:36 +02:00
|
|
|
while (exists $IkiWiki::pagecase{lc $page.$c} ||
|
2006-07-30 06:31:08 +02:00
|
|
|
-e pagefile($page.$c)) {
|
|
|
|
$c++
|
|
|
|
}
|
2007-01-14 05:17:53 +01:00
|
|
|
|
|
|
|
# Make sure that the file name isn't too long.
|
|
|
|
# NB: This doesn't check for path length limits.
|
|
|
|
my $max=POSIX::pathconf($config{srcdir}, &POSIX::_PC_NAME_MAX);
|
2007-04-01 21:59:42 +02:00
|
|
|
if (defined $max && length(htmlfn($page)) >= $max) {
|
2007-01-14 05:17:53 +01:00
|
|
|
$c="";
|
2007-01-14 06:01:46 +01:00
|
|
|
$page=$feed->{dir}."/item";
|
2007-01-14 05:17:53 +01:00
|
|
|
while (exists $IkiWiki::pagecase{lc $page.$c} ||
|
|
|
|
-e pagefile($page.$c)) {
|
|
|
|
$c++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-07-30 06:31:08 +02:00
|
|
|
$guid->{page}=$page;
|
2006-12-29 05:38:40 +01:00
|
|
|
debug(sprintf(gettext("creating new page %s"), $page));
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
|
|
|
$guid->{feed}=$feed->{name};
|
|
|
|
|
|
|
|
# To write or not to write? Need to avoid writing unchanged pages
|
|
|
|
# to avoid unneccessary rebuilding. The mtime from rss cannot be
|
|
|
|
# trusted; let's use a digest.
|
|
|
|
eval q{use Digest::MD5 'md5_hex'};
|
2006-11-08 22:03:33 +01:00
|
|
|
error($@) if $@;
|
2006-07-30 07:17:59 +02:00
|
|
|
require Encode;
|
|
|
|
my $digest=md5_hex(Encode::encode_utf8($params{content}));
|
2006-09-10 00:50:27 +02:00
|
|
|
return unless ! exists $guid->{md5} || $guid->{md5} ne $digest || $config{rebuild};
|
2006-07-30 06:31:08 +02:00
|
|
|
$guid->{md5}=$digest;
|
|
|
|
|
|
|
|
# Create the page.
|
2006-09-10 00:50:27 +02:00
|
|
|
my $template=template("aggregatepost.tmpl", blind_cache => 1);
|
2006-07-31 00:58:48 +02:00
|
|
|
$template->param(title => $params{title})
|
|
|
|
if defined $params{title} && length($params{title});
|
2006-08-03 23:50:47 +02:00
|
|
|
$template->param(content => htmlescape(htmlabs($params{content}, $feed->{feedurl})));
|
2006-07-30 06:31:08 +02:00
|
|
|
$template->param(name => $feed->{name});
|
2006-08-04 02:59:00 +02:00
|
|
|
$template->param(url => $feed->{url});
|
2008-01-09 02:41:25 +01:00
|
|
|
$template->param(copyright => $params{copyright})
|
|
|
|
if defined $params{copyright} && length $params{copyright};
|
2006-08-04 02:01:51 +02:00
|
|
|
$template->param(permalink => urlabs($params{link}, $feed->{feedurl}))
|
2006-08-03 23:50:47 +02:00
|
|
|
if defined $params{link};
|
2006-07-30 06:31:08 +02:00
|
|
|
if (ref $feed->{tags}) {
|
2006-07-30 07:14:35 +02:00
|
|
|
$template->param(tags => [map { tag => $_ }, @{$feed->{tags}}]);
|
2006-07-30 06:31:08 +02:00
|
|
|
}
|
2007-04-01 21:59:42 +02:00
|
|
|
writefile(htmlfn($guid->{page}), $config{srcdir},
|
2006-07-30 06:31:08 +02:00
|
|
|
$template->output);
|
|
|
|
|
|
|
|
# Set the mtime, this lets the build process get the right creation
|
|
|
|
# time on record for the new page.
|
2008-01-16 08:01:00 +01:00
|
|
|
utime $mtime, $mtime, pagefile($guid->{page})
|
|
|
|
if defined $mtime && $mtime <= time;
|
2006-07-30 06:31:08 +02:00
|
|
|
} #}}}
|
|
|
|
|
2006-08-03 23:50:47 +02:00
|
|
|
sub htmlescape ($) { #{{{
|
|
|
|
# escape accidental wikilinks and preprocessor stuff
|
|
|
|
my $html=shift;
|
|
|
|
$html=~s/(?<!\\)\[\[/\\\[\[/g;
|
|
|
|
return $html;
|
|
|
|
} #}}}
|
|
|
|
|
|
|
|
sub urlabs ($$) { #{{{
|
|
|
|
my $url=shift;
|
|
|
|
my $urlbase=shift;
|
|
|
|
|
|
|
|
URI->new_abs($url, $urlbase)->as_string;
|
|
|
|
} #}}}
|
|
|
|
|
|
|
|
sub htmlabs ($$) { #{{{
|
|
|
|
# Convert links in html from relative to absolute.
|
|
|
|
# Note that this is a heuristic, which is not specified by the rss
|
|
|
|
# spec and may not be right for all feeds. Also, see Debian
|
2006-08-28 06:40:00 +02:00
|
|
|
# bug #381359.
|
2006-08-03 23:50:47 +02:00
|
|
|
my $html=shift;
|
|
|
|
my $urlbase=shift;
|
|
|
|
|
|
|
|
my $ret="";
|
|
|
|
my $p = HTML::Parser->new(api_version => 3);
|
|
|
|
$p->handler(default => sub { $ret.=join("", @_) }, "text");
|
|
|
|
$p->handler(start => sub {
|
|
|
|
my ($tagname, $pos, $text) = @_;
|
|
|
|
if (ref $HTML::Tagset::linkElements{$tagname}) {
|
|
|
|
while (4 <= @$pos) {
|
|
|
|
# use attribute sets from right to left
|
|
|
|
# to avoid invalidating the offsets
|
|
|
|
# when replacing the values
|
|
|
|
my($k_offset, $k_len, $v_offset, $v_len) =
|
|
|
|
splice(@$pos, -4);
|
|
|
|
my $attrname = lc(substr($text, $k_offset, $k_len));
|
|
|
|
next unless grep { $_ eq $attrname } @{$HTML::Tagset::linkElements{$tagname}};
|
|
|
|
next unless $v_offset; # 0 v_offset means no value
|
|
|
|
my $v = substr($text, $v_offset, $v_len);
|
|
|
|
$v =~ s/^([\'\"])(.*)\1$/$2/;
|
|
|
|
my $new_v=urlabs($v, $urlbase);
|
|
|
|
$new_v =~ s/\"/"/g; # since we quote with ""
|
|
|
|
substr($text, $v_offset, $v_len) = qq("$new_v");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$ret.=$text;
|
|
|
|
}, "tagname, tokenpos, text");
|
|
|
|
$p->parse($html);
|
|
|
|
$p->eof;
|
|
|
|
|
|
|
|
return $ret;
|
|
|
|
} #}}}
|
|
|
|
|
2006-07-30 06:31:08 +02:00
|
|
|
sub remove_feeds () { #{{{
|
|
|
|
my $page=shift;
|
|
|
|
|
|
|
|
my %removed;
|
|
|
|
foreach my $id (keys %feeds) {
|
|
|
|
if ($feeds{$id}->{sourcepage} eq $page) {
|
|
|
|
$feeds{$id}->{remove}=1;
|
|
|
|
$removed{$id}=1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} #}}}
|
|
|
|
|
|
|
|
sub pagefile ($) { #{{{
|
2006-07-30 09:17:44 +02:00
|
|
|
my $page=shift;
|
2006-07-30 06:31:08 +02:00
|
|
|
|
2007-04-01 22:55:39 +02:00
|
|
|
return "$config{srcdir}/".htmlfn($page);
|
2006-07-30 06:31:08 +02:00
|
|
|
} #}}}
|
|
|
|
|
2007-04-01 21:59:42 +02:00
|
|
|
sub htmlfn ($) { #{{{
|
2007-07-25 03:16:53 +02:00
|
|
|
return shift().".".$config{htmlext};
|
2007-04-01 21:59:42 +02:00
|
|
|
} #}}}
|
|
|
|
|
2008-02-03 21:17:15 +01:00
|
|
|
my $aggregatelock;
|
|
|
|
|
|
|
|
sub lockaggregate () { #{{{
|
|
|
|
# Take an exclusive lock to prevent multiple concurrent aggregators.
|
|
|
|
# Returns true if the lock was aquired.
|
|
|
|
if (! -d $config{wikistatedir}) {
|
|
|
|
mkdir($config{wikistatedir});
|
|
|
|
}
|
|
|
|
open($aggregatelock, '>', "$config{wikistatedir}/aggregatelock") ||
|
|
|
|
error ("cannot open to $config{wikistatedir}/aggregatelock: $!");
|
|
|
|
if (! flock($aggregatelock, 2 | 4)) { # LOCK_EX | LOCK_NB
|
|
|
|
close($aggregatelock) || error("failed closing aggregatelock: $!");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
} #}}}
|
|
|
|
|
|
|
|
sub unlockaggregate () { #{{{
|
|
|
|
return close($aggregatelock) if $aggregatelock;
|
|
|
|
return;
|
|
|
|
} #}}}
|
|
|
|
|
2006-07-30 06:31:08 +02:00
|
|
|
1
|