useragent: Automatically choose whether to use LWPx::ParanoidAgent

The simple implementation of this, which I'd prefer to use, would be:
if we can import LWPx::ParanoidAgent, use it; otherwise, use

However, aggregate has historically worked with proxies, and
LWPx::ParanoidAgent quite reasonably refuses to work with proxies
(because it can't know whether those proxies are going to do the same
filtering that LWPx::ParanoidAgent would).

Signed-off-by: Simon McVittie <>
Simon McVittie 2019-02-10 17:22:06 +00:00
parent 67543ce1d6
commit d283e4ca1a
6 changed files with 458 additions and 36 deletions

View File

@ -2469,16 +2469,131 @@ sub add_autofile ($$$) {
sub useragent () {
sub useragent (@) {
my %params = @_;
my $for_url = delete $params{for_url};
# Fail safe, in case a plugin calling this function is relying on
# a future parameter to make the UA more strict
foreach my $key (keys %params) {
error "Internal error: useragent(\"$key\" => ...) not understood";
eval q{use LWP};
error($@) if $@;
return LWP::UserAgent->new(
cookie_jar => $config{cookiejar},
env_proxy => 1, # respect proxy env vars
my %args = (
agent => $config{useragent},
cookie_jar => $config{cookiejar},
env_proxy => 0,
protocols_allowed => [qw(http https)],
my %proxies;
if (defined $for_url) {
# We know which URL we're going to fetch, so we can choose
# whether it's going to go through a proxy or not.
# We reimplement http_proxy, https_proxy and no_proxy here, so
# that we are not relying on LWP implementing them exactly the
# same way we do.
eval q{use URI};
error($@) if $@;
my $proxy;
my $uri = URI->new($for_url);
if ($uri->scheme eq 'http') {
$proxy = $ENV{http_proxy};
# HTTP_PROXY is deliberately not implemented
# because the HTTP_* namespace is also used by CGI
elsif ($uri->scheme eq 'https') {
$proxy = $ENV{https_proxy};
$proxy = $ENV{HTTPS_PROXY} unless defined $proxy;
else {
$proxy = undef;
foreach my $var (qw(no_proxy NO_PROXY)) {
my $no_proxy = $ENV{$var};
if (defined $no_proxy) {
foreach my $domain (split /\s*,\s*/, $no_proxy) {
if ($domain =~ s/^\*?\.//) {
# no_proxy="*" or
# "": match suffix
# against
if ($uri->host =~ m/(^|\.)\Q$domain\E$/i) {
$proxy = undef;
else {
# no_proxy="":
# match exactly
if (lc $uri->host eq lc $domain) {
$proxy = undef;
if (defined $proxy) {
$proxies{$uri->scheme} = $proxy;
# Paranoia: make sure we can't bypass the proxy
$args{protocols_allowed} = [$uri->scheme];
else {
# The plugin doesn't know yet which URL(s) it's going to
# fetch, so we have to make some conservative assumptions.
my $http_proxy = $ENV{http_proxy};
my $https_proxy = $ENV{https_proxy};
$https_proxy = $ENV{HTTPS_PROXY} unless defined $https_proxy;
# We don't respect no_proxy here: if we are not using the
# paranoid user-agent, then we need to give the proxy the
# opportunity to reject undesirable requests.
# If we have one, we need the other: otherwise, neither
# LWPx::ParanoidAgent nor the proxy would have the
# opportunity to filter requests for the other protocol.
if (defined $https_proxy && defined $http_proxy) {
%proxies = (http => $http_proxy, https => $https_proxy);
elsif (defined $https_proxy) {
%proxies = (http => $https_proxy, https => $https_proxy);
elsif (defined $http_proxy) {
%proxies = (http => $http_proxy, https => $http_proxy);
if (scalar keys %proxies) {
# The configured proxy is responsible for deciding which
# URLs are acceptable to fetch and which URLs are not.
my $ua = LWP::UserAgent->new(%args);
foreach my $scheme (@{$ua->protocols_allowed}) {
unless ($proxies{$scheme}) {
error "internal error: $scheme is allowed but has no proxy";
# We can't pass the proxies in %args because that only
# works since LWP 6.24.
foreach my $scheme (keys %proxies) {
$ua->proxy($scheme, $proxies{$scheme});
return $ua;
eval q{use LWPx::ParanoidAgent};
if ($@) {
print STDERR "warning: installing LWPx::ParanoidAgent is recommended\n";
return LWP::UserAgent->new(%args);
return LWPx::ParanoidAgent->new(%args);
sub sortspec_translate ($$) {

View File

@ -513,7 +513,10 @@ sub aggregate (@) {
$feed->{feedurl}=pop @urls;
my $ua=useragent();
# Using the for_url parameter makes sure we crash if used
# with an older that didn't automatically try
# to use LWPx::ParanoidAgent.
my $ua=useragent(for_url => $feed->{feedurl});
my $res=URI::Fetch->fetch($feed->{feedurl}, UserAgent=>$ua);
if (! $res) {

View File

@ -57,18 +57,10 @@ sub checkconfig () {
error $@ if $@;
eval q{use LWPx::ParanoidAgent};
if (!$@) {
$client=LWPx::ParanoidAgent->new(agent => $config{useragent});
else {
eval q{use LWP};
if ($@) {
error $@;
# Using the for_url parameter makes sure we crash if used
# with an older that didn't automatically try
# to use LWPx::ParanoidAgent.
$client=useragent(for_url => $config{blogspam_server});
sub checkcontent (@) {

View File

@ -219,14 +219,10 @@ sub getobj ($$) {
eval q{use Net::OpenID::Consumer};
error($@) if $@;
my $ua;
eval q{use LWPx::ParanoidAgent};
if (! $@) {
$ua=LWPx::ParanoidAgent->new(agent => $config{useragent});
else {
# We pass the for_url parameter, even though it's undef, because
# that will make sure we crash if used with an older
# that didn't automatically try to use LWPx::ParanoidAgent.
my $ua=useragent(for_url => undef);
# Store the secret in the session.
my $secret=$session->param("openid_secret");

View File

@ -70,18 +70,17 @@ sub ping {
eval q{use Net::INET6Glue::INET_is_INET6}; # may not be available
my $ua;
eval q{use LWPx::ParanoidAgent};
if (!$@) {
$ua=LWPx::ParanoidAgent->new(agent => $config{useragent});
else {
eval q{use LWP};
eval {
# We pass the for_url parameter, even though it's
# undef, because that will make sure we crash if used
# with an older that didn't automatically
# try to use LWPx::ParanoidAgent.
$ua=useragent(for_url => undef);
if ($@) {
debug(gettext("LWP not found, not pinging"));
debug(gettext("LWP not found, not pinging").": $@");
$ua->timeout($config{pinger_timeout} || 15);
# daemonise here so slow pings don't slow down wiki updates

t/useragent.t 100755
View File

@ -0,0 +1,317 @@
use warnings;
use strict;
use Test::More;
my $have_paranoid_agent;
plan(skip_all => 'LWP not available')
unless eval q{
use LWP qw(); 1;
$have_paranoid_agent = eval q{
use LWPx::ParanoidAgent qw(); 1;
eval { useragent(future_feature => 1); };
ok($@, 'future features should cause useragent to fail');
diag "==== No proxy ====";
delete $ENV{http_proxy};
delete $ENV{https_proxy};
delete $ENV{no_proxy};
delete $ENV{NO_PROXY};
diag "---- Unspecified URL ----";
my $ua = useragent(for_url => undef);
skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), undef, 'No http proxy');
is($ua->proxy('https'), undef, 'No https proxy');
diag "---- Specified URL ----";
$ua = useragent(for_url => '');
skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), undef, 'No http proxy');
is($ua->proxy('https'), undef, 'No https proxy');
diag "==== Proxy for everything ====";
$ENV{http_proxy} = 'http://proxy:8080';
$ENV{https_proxy} = 'http://sproxy:8080';
delete $ENV{no_proxy};
delete $ENV{NO_PROXY};
diag "---- Unspecified URL ----";
$ua = useragent(for_url => undef);
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
is($ua->proxy('https'), 'http://sproxy:8080', 'should use CONNECT proxy');
$ua = useragent(for_url => '');
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http)]);
is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
# We don't care what $ua->proxy('https') is, because it won't be used
$ua = useragent(for_url => '');
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(https)]);
# We don't care what $ua->proxy('http') is, because it won't be used
is($ua->proxy('https'), 'http://sproxy:8080', 'should use CONNECT proxy');
diag "==== Selective proxy ====";
$ENV{http_proxy} = 'http://proxy:8080';
$ENV{https_proxy} = 'http://sproxy:8080';
$ENV{no_proxy} = '*,,';
delete $ENV{NO_PROXY};
diag "---- Unspecified URL ----";
$ua = useragent(for_url => undef);
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
is($ua->proxy('https'), 'http://sproxy:8080', 'should use CONNECT proxy');
diag "---- Exact match for no_proxy ----";
$ua = useragent(for_url => '');
skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), undef);
is($ua->proxy('https'), undef);
diag "---- Subdomain of exact domain in no_proxy ----";
$ua = useragent(for_url => '');
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http)]);
is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
diag "---- matches * ----";
$ua = useragent(for_url => '');
skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), undef);
is($ua->proxy('https'), undef);
diag "---- matches * ----";
$ua = useragent(for_url => '');
skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), undef);
is($ua->proxy('https'), undef);
diag "---- does not match * ----";
$ua = useragent(for_url => '');
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(https)]);
is($ua->proxy('https'), 'http://sproxy:8080', 'should use proxy');
diag "---- matches ----";
$ua = useragent(for_url => '');
skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), undef);
is($ua->proxy('https'), undef);
diag "---- matches ----";
$ua = useragent(for_url => '');
skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), undef);
is($ua->proxy('https'), undef);
diag "---- does not match ----";
$ua = useragent(for_url => '');
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(https)]);
is($ua->proxy('https'), 'http://sproxy:8080', 'should use proxy');
diag "==== Selective proxy (alternate variables) ====";
$ENV{http_proxy} = 'http://proxy:8080';
delete $ENV{https_proxy};
$ENV{HTTPS_PROXY} = 'http://sproxy:8080';
delete $ENV{no_proxy};
$ENV{NO_PROXY} = '*,,';
diag "---- Unspecified URL ----";
$ua = useragent(for_url => undef);
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
is($ua->proxy('https'), 'http://sproxy:8080', 'should use CONNECT proxy');
diag "---- Exact match for no_proxy ----";
$ua = useragent(for_url => '');
skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), undef);
is($ua->proxy('https'), undef);
diag "---- Subdomain of exact domain in no_proxy ----";
$ua = useragent(for_url => '');
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http)]);
is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
diag "---- matches * ----";
$ua = useragent(for_url => '');
skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), undef);
is($ua->proxy('https'), undef);
diag "---- matches * ----";
$ua = useragent(for_url => '');
skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), undef);
is($ua->proxy('https'), undef);
diag "---- does not match * ----";
$ua = useragent(for_url => '');
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(https)]);
is($ua->proxy('https'), 'http://sproxy:8080', 'should use proxy');
diag "---- matches ----";
$ua = useragent(for_url => '');
skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), undef);
is($ua->proxy('https'), undef);
diag "---- matches ----";
$ua = useragent(for_url => '');
skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), undef);
is($ua->proxy('https'), undef);
diag "---- does not match ----";
$ua = useragent(for_url => '');
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(https)]);
is($ua->proxy('https'), 'http://sproxy:8080', 'should use proxy');
diag "==== Selective proxy (many variables) ====";
$ENV{http_proxy} = 'http://proxy:8080';
$ENV{https_proxy} = 'http://sproxy:8080';
# This one should be ignored in favour of https_proxy
$ENV{HTTPS_PROXY} = 'http://not.preferred.proxy:3128';
# These two should be merged
$ENV{no_proxy} = '*,';
$ENV{NO_PROXY} = '';
diag "---- Unspecified URL ----";
$ua = useragent(for_url => undef);
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
is($ua->proxy('https'), 'http://sproxy:8080', 'should use CONNECT proxy');
diag "---- Exact match for no_proxy ----";
$ua = useragent(for_url => '');
skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), undef);
is($ua->proxy('https'), undef);
diag "---- Subdomain of exact domain in no_proxy ----";
$ua = useragent(for_url => '');
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http)]);
is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
diag "---- matches * ----";
$ua = useragent(for_url => '');
skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), undef);
is($ua->proxy('https'), undef);
diag "---- matches * ----";
$ua = useragent(for_url => '');
skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), undef);
is($ua->proxy('https'), undef);
diag "---- does not match * ----";
$ua = useragent(for_url => '');
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(https)]);
is($ua->proxy('https'), 'http://sproxy:8080', 'should use proxy');
diag "==== One but not the other ====\n";
$ENV{http_proxy} = 'http://proxy:8080';
delete $ENV{https_proxy};
delete $ENV{no_proxy};
delete $ENV{NO_PROXY};
$ua = useragent(for_url => undef);
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
is($ua->proxy('https'), 'http://proxy:8080', 'should use proxy');
delete $ENV{http_proxy};
$ENV{https_proxy} = 'http://sproxy:8080';
delete $ENV{no_proxy};
delete $ENV{NO_PROXY};
$ua = useragent(for_url => undef);
ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
is($ua->proxy('http'), 'http://sproxy:8080', 'should use proxy');
is($ua->proxy('https'), 'http://sproxy:8080', 'should use proxy');