* htmlscrubber security fix: Block javascript in uris.

* Add htmlscrubber test suite.
master
Joey Hess 2008-02-10 13:16:40 -05:00
parent 196d27cbbc
commit d7e0c035e5
4 changed files with 58 additions and 7 deletions

View File

@ -18,6 +18,28 @@ my $_scrubber;
sub scrubber { #{{{
return $_scrubber if defined $_scrubber;
# Only known uri schemes are allowed to avoid all the ways of
# embedding javascrpt.
# List at http://en.wikipedia.org/wiki/URI_scheme
my $uri_schemes=join("|",
# IANA registered schemes
"http", "https", "ftp", "mailto", "file", "telnet", "gopher",
"aaa", "aaas", "acap", "cap", "cid", "crid",
"dav", "dict", "dns", "fax", "go", "h323", "im", "imap",
"ldap", "mid", "news", "nfs", "nntp", "pop", "pres",
"sip", "sips", "snmp", "tel", "urn", "wais", "xmpp",
"z39.50r", "z39.50s",
# data is a special case. Allow data:text/<image>, but
# disallow data:text/javascript and everything else.
qr/data:text\/(?:png|gif|jpeg)/,
# Selected unofficial schemes
"about", "aim", "callto", "cvs", "ed2k", "feed", "fish", "gg",
"irc", "ircs", "lastfm", "ldaps", "magnet", "mms",
"msnim", "notes", "rsync", "secondlife", "skype", "ssh",
"sftp", "sms", "steam", "webcal", "ymsgr",
);
my $link=qr/^(?:$uri_schemes:|[^:]+$)/i;
eval q{use HTML::Scrubber};
error($@) if $@;
# Lists based on http://feedparser.org/docs/html-sanitization.html
@ -35,23 +57,27 @@ sub scrubber { #{{{
}],
default => [undef, { (
map { $_ => 1 } qw{
abbr accept accept-charset accesskey action
abbr accept accept-charset accesskey
align alt axis border cellpadding cellspacing
char charoff charset checked cite class
clear cols colspan color compact coords
datetime dir disabled enctype for frame
headers height href hreflang hspace id ismap
headers height hreflang hspace id ismap
label lang longdesc maxlength media method
multiple name nohref noshade nowrap prompt
readonly rel rev rows rowspan rules scope
selected shape size span src start summary
selected shape size span start summary
tabindex target title type usemap valign
value vspace width
poster autoplay loopstart loopend end
autoplay loopstart loopend end
playcount controls
} ),
"/" => 1, # emit proper <hr /> XHTML
}],
href => $link,
src => $link,
action => $link,
poster => $link,
}],
);
return $_scrubber;
} # }}}

4
debian/changelog vendored
View File

@ -8,6 +8,10 @@ ikiwiki (2.40) UNRELEASED; urgency=low
the underlay to support either setting of prefix_directives. Add NEWS
entry with migration information.
[ Joey Hess ]
* htmlscrubber security fix: Block javascript in uris.
* Add htmlscrubber test suite.
-- Josh Triplett <josh@freedesktop.org> Sat, 09 Feb 2008 23:01:19 -0800
ikiwiki (2.31) unstable; urgency=low

View File

@ -36,3 +36,4 @@ plugin is active:
* <span style="background: url(javascript:window.location='http://example.org/')">CSS script test</span>
* <span style="&#x61;&#x6e;&#x79;&#x3a;&#x20;&#x65;&#x78;&#x70;&#x72;&#x65;&#x73;&#x73;&#x69;&#x6f;&#x6e;&#x28;&#x77;&#x69;&#x6e;&#x64;&#x6f;&#x77;&#x2e;&#x6c;&#x6f;&#x63;&#x61;&#x74;&#x69;&#x6f;&#x6e;&#x3d;&#x27;&#x68;&#x74;&#x74;&#x70;&#x3a;&#x2f;&#x2f;&#x65;&#x78;&#x61;&#x6d;&#x70;&#x6c;&#x65;&#x2e;&#x6f;&#x72;&#x67;&#x2f;&#x27;&#x29;">entity-encoded CSS script test</span>
* <span style="&#97;&#110;&#121;&#58;&#32;&#101;&#120;&#112;&#114;&#101;&#115;&#115;&#105;&#111;&#110;&#40;&#119;&#105;&#110;&#100;&#111;&#119;&#46;&#108;&#111;&#99;&#97;&#116;&#105;&#111;&#110;&#61;&#39;&#104;&#116;&#116;&#112;&#58;&#47;&#47;&#101;&#120;&#97;&#109;&#112;&#108;&#101;&#46;&#111;&#114;&#103;&#47;&#39;&#41;">entity-encoded CSS script test</span>
* <a href="javascript&#x3A;alert('foo')">click me</a>

View File

@ -1,7 +1,7 @@
#!/usr/bin/perl
use warnings;
use strict;
use Test::More tests => 16;
use Test::More tests => 26;
use Encode;
BEGIN { use_ok("IkiWiki"); }
@ -20,7 +20,6 @@ is(IkiWiki::htmlize("foo", "mdwn", readfile("t/test1.mdwn")),
ok(IkiWiki::htmlize("foo", "mdwn", readfile("t/test2.mdwn")),
"this file crashes markdown if it's fed in as decoded utf-8");
# embedded javascript sanitisation tests
sub gotcha {
my $html=IkiWiki::htmlize("foo", "mdwn", shift);
return $html =~ /GOTCHA/;
@ -41,10 +40,31 @@ ok(!gotcha(q{<span style="&#97;&#110;&#121;&#58;&#32;&#101;&#120;&#112;&#114;&#1
"another entity-encoded CSS script test");
ok(!gotcha(q{<script>GOTCHA</script>}),
"script tag");
ok(!gotcha(q{<form action="javascript:alert('GOTCHA')">foo</form>}),
"form action with javascript");
ok(!gotcha(q{<video poster="javascript:alert('GOTCHA')" href="foo.avi">foo</video>}),
"video poster with javascript");
ok(!gotcha(q{<span style="background: url(javascript:window.location=GOTCHA)">a</span>}),
"CSS script test");
ok(! gotcha(q{<img src="data:text/javascript:GOTCHA">}),
"data:text/javascript (jeez!)");
ok(gotcha(q{<img src="data:text/png:GOTCHA">}), "data:text/png");
ok(gotcha(q{<img src="data:text/gif:GOTCHA">}), "data:text/gif");
ok(gotcha(q{<img src="data:text/jpeg:GOTCHA">}), "data:text/jpeg");
ok(gotcha(q{<p>javascript:alert('GOTCHA')</p>}),
"not javascript AFAIK (but perhaps some web browser would like to
be perverse and assume it is?)");
ok(gotcha(q{<img src="javascript.png?GOTCHA">}), "not javascript");
ok(gotcha(q{<a href="javascript.png?GOTCHA">foo</a>}), "not javascript");
is(IkiWiki::htmlize("foo", "mdwn",
q{<img alt="foo" src="foo.gif">}),
q{<img alt="foo" src="foo.gif">}, "img with alt tag allowed");
is(IkiWiki::htmlize("foo", "mdwn",
q{<a href="http://google.com/">}),
q{<a href="http://google.com/">}, "absolute url allowed");
is(IkiWiki::htmlize("foo", "mdwn",
q{<a href="foo.html">}),
q{<a href="foo.html">}, "relative url allowed");
is(IkiWiki::htmlize("foo", "mdwn",
q{<span class="foo">bar</span>}),
q{<span class="foo">bar</span>}, "class attribute allowed");