blogspam: New plugin, adding spam filtering for page editing / comment posting using the BlogSpam.net API.
parent
16c56af605
commit
7ee92cab40
|
@ -0,0 +1,111 @@
|
|||
#!/usr/bin/perl
|
||||
package IkiWiki::Plugin::blogspam;
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use IkiWiki 3.00;
|
||||
require RPC::XML;
|
||||
require RPC::XML::Client;
|
||||
|
||||
my $defaulturl='http://test.blogspam.net:8888/';
|
||||
|
||||
sub import {
|
||||
hook(type => "getsetup", id => "blogspam", call => \&getsetup);
|
||||
hook(type => "checkcontent", id => "blogspam", call => \&checkcontent);
|
||||
}
|
||||
|
||||
sub getsetup () {
|
||||
return
|
||||
plugin => {
|
||||
safe => 1,
|
||||
rebuild => 0,
|
||||
},
|
||||
blogspam_pagespec => {
|
||||
type => 'pagespec',
|
||||
example => 'postcomment(*)',
|
||||
description => 'PageSpec of pages to check for spam',
|
||||
link => 'ikiwiki/PageSpec',
|
||||
safe => 1,
|
||||
rebuild => 0,
|
||||
},
|
||||
blogspam_options => {
|
||||
type => "string",
|
||||
example => "blacklist=1.2.3.4,blacklist=8.7.6.5,max-links=10",
|
||||
description => "options to send to blogspam server",
|
||||
link => "http://blogspam.net/api/testComment.html#options",
|
||||
safe => 1,
|
||||
rebuild => 0,
|
||||
},
|
||||
blogspam_server => {
|
||||
type => "string",
|
||||
default => $defaulturl,
|
||||
description => "blogspam server XML-RPC url",
|
||||
safe => 1,
|
||||
rebuild => 0,
|
||||
},
|
||||
}
|
||||
|
||||
sub checkcontent (@) {
|
||||
my %params=@_;
|
||||
|
||||
if (exists $config{blogspam_pagespec}) {
|
||||
return undef
|
||||
if ! pagespec_match($params{page}, $config{blogspam_pagespec},
|
||||
location => $params{page});
|
||||
}
|
||||
|
||||
my $url=$defaulturl;
|
||||
$url = $params{blogspam_server} if exists $params{blogspam_server};
|
||||
my $client = RPC::XML::Client->new($url);
|
||||
|
||||
my @options = split(",", $params{blogspam_options})
|
||||
if exists $params{blogspam_options};
|
||||
|
||||
# Allow short comments and whitespace-only edits, unless the user
|
||||
# has overridden min-words themselves.
|
||||
push @options, "min-words=0"
|
||||
unless grep /^min-words=/i, @options;
|
||||
# Wiki pages can have a lot of urls, unless the user specifically
|
||||
# wants to limit them.
|
||||
push @options, "exclude=lotsaurls"
|
||||
unless grep /^max-links/i, @options;
|
||||
# Unless the user specified a size check, disable such checking.
|
||||
push @options, "exclude=size"
|
||||
unless grep /^(?:max|min)-size/i, @options;
|
||||
# This test has absurd false positives on words like "alpha"
|
||||
# and "buy".
|
||||
push @options, "exclude=stopwords";
|
||||
|
||||
# blogspam API does not have a field for author url, so put it in
|
||||
# the content to be checked.
|
||||
if (exists $params{url}) {
|
||||
$params{content}.="\n".$params{url};
|
||||
}
|
||||
|
||||
my $res = $client->send_request('testComment', {
|
||||
ip => $ENV{REMOTE_ADDR},
|
||||
comment => $params{content},
|
||||
subject => defined $params{subject} ? $params{subject} : "",
|
||||
name => defined $params{author} ? $params{author} : "",
|
||||
options => join(",", @options),
|
||||
site => $config{url},
|
||||
version => "ikiwiki ".$IkiWiki::version,
|
||||
});
|
||||
|
||||
if (! ref $res || ! defined $res->value) {
|
||||
debug("failed to get response from blogspam server ($url)");
|
||||
return undef;
|
||||
}
|
||||
elsif ($res->value =~ /^SPAM:(.*)/) {
|
||||
return gettext("Sorry, but that looks like spam to <a href=\"http://blogspam.net/\">blogspam</a>: ").$1;
|
||||
}
|
||||
elsif ($res->value ne 'OK') {
|
||||
debug(gettext("blogspam server failure: ").$res->value);
|
||||
return undef;
|
||||
}
|
||||
else {
|
||||
return undef;
|
||||
}
|
||||
}
|
||||
|
||||
1
|
|
@ -20,6 +20,8 @@ ikiwiki (3.02) UNRELEASED; urgency=low
|
|||
in blogging.
|
||||
* checkcontent: New hook, can be used to implement arbitrary content
|
||||
filters, including spam filters.
|
||||
* blogspam: New plugin, adding spam filtering for page editing / comment
|
||||
posting using the BlogSpam.net API.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Tue, 06 Jan 2009 15:02:52 -0500
|
||||
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
[[!template id=plugin name=blogspam author="[[Joey]]"]]
|
||||
[[!tag type/auth]]
|
||||
|
||||
This plugin adds antispam support to ikiwiki, using the
|
||||
[blogspam.net](http://blogspam.net/) API. Both page edits and
|
||||
[[comment|comments]] postings can be checked for spam. Currently,
|
||||
detected spam is not saved for human review, it is just rejected.
|
||||
|
||||
You can control how content is tested via the `blogspam_options`
|
||||
setting. By default, the options are configured in a way that is
|
||||
appropriate for wiki content. This includes turning off some of the
|
||||
more problimatic tests.
|
||||
|
||||
The `blogspam_pagespec` setting is a [[ikiwiki/PageSpec]] that can be
|
||||
used to configure which pages are checked for spam. The default is to check
|
||||
all edits. If you only want to check [[comments]] (not wiki page edits),
|
||||
set it to "postcomment(*)".
|
||||
|
||||
By default, the blogspam.net server is used to do the spam checking. To
|
||||
change this, the `blogspam_server` option can be set to the url for a
|
||||
different server implementing the same API. Note that content is sent
|
||||
unencrypted over the internet to the server, and the server sees
|
||||
the full text of the content.
|
|
@ -17,3 +17,14 @@ Cheers,
|
|||
You might look at the Wikipedia page on "Spam\_in\_blogs" for more ideas. In particular, would it be possible to force a subset of the pages (by regex, but you'd choose the regex to match those pages which are publicly writable) to use rel="nofollow" in all links.
|
||||
|
||||
> I just wanted to leave a link here to the [[todo/require_CAPTCHA_to_edit]] plugin patch. Unfortunately that plugin currently interacts badly with the openid plugin. -- [[Will]]
|
||||
|
||||
|
||||
---
|
||||
|
||||
Ikiwiki now has a checkcontent hook that plugins can use to see content
|
||||
that is being entered and check it for spam/whatever.
|
||||
|
||||
There is a blogspam plugin that uses the blogspam.org service
|
||||
to check for common spam signatures. --[[Joey]]
|
||||
|
||||
[[done]]
|
||||
|
|
|
@ -5,8 +5,8 @@ use Test::More;
|
|||
|
||||
my @progs="ikiwiki.in";
|
||||
my @libs="IkiWiki.pm";
|
||||
# monotone, external, amazon_s3 skipped since they need perl modules
|
||||
push @libs, map { chomp; $_ } `find IkiWiki -type f -name \\*.pm | grep -v monotone.pm | grep -v external.pm | grep -v amazon_s3.pm`;
|
||||
# monotone, external, blogspam, amazon_s3 skipped since they need perl modules
|
||||
push @libs, map { chomp; $_ } `find IkiWiki -type f -name \\*.pm | grep -v monotone.pm | grep -v external.pm | grep -v blogspam.pm | grep -v amazon_s3.pm`;
|
||||
push @libs, 'IkiWiki/Plugin/skeleton.pm.example';
|
||||
|
||||
plan(tests => (@progs + @libs));
|
||||
|
|
Loading…
Reference in New Issue