htmlbalance: new plugin that balances tags by parsing and re-serializing
parent
408d483dc2
commit
e7a840ed9a
|
@ -0,0 +1,57 @@
|
|||
#!/usr/bin/perl
|
||||
package IkiWiki::Plugin::htmlbalance;
|
||||
|
||||
# htmlbalance: Parse and re-serialize HTML to ensure balanced tags
|
||||
#
|
||||
# Copyright 2008 Simon McVittie <http://smcv.pseudorandom.co.uk/>
|
||||
# Licensed under the GNU GPL, version 2, or any later version published by the
|
||||
# Free Software Foundation
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use IkiWiki 2.00;
|
||||
|
||||
sub import { #{{{
|
||||
hook(type => "getsetup", id => "htmlbalance", call => \&getsetup);
|
||||
hook(type => "sanitize", id => "htmlbalance", call => \&sanitize);
|
||||
} # }}}
|
||||
|
||||
sub getsetup () { #{{{
|
||||
return
|
||||
plugin => {
|
||||
safe => 1,
|
||||
rebuild => undef,
|
||||
},
|
||||
} #}}}
|
||||
|
||||
sub sanitize (@) { #{{{
|
||||
my %params=@_;
|
||||
my $ret = '';
|
||||
|
||||
eval {
|
||||
use HTML::TreeBuilder;
|
||||
use XML::Atom::Util qw(encode_xml);
|
||||
};
|
||||
|
||||
if ($@) {
|
||||
error($@);
|
||||
return $params{content};
|
||||
}
|
||||
|
||||
my $tree = HTML::TreeBuilder->new_from_content($params{content});
|
||||
my @nodes = $tree->disembowel();
|
||||
foreach my $node (@nodes) {
|
||||
if (ref $node) {
|
||||
$ret .= $node->as_XML();
|
||||
chomp $ret;
|
||||
$node->delete();
|
||||
}
|
||||
else {
|
||||
$ret .= encode_xml($node);
|
||||
}
|
||||
}
|
||||
$tree->delete();
|
||||
return $ret;
|
||||
} # }}}
|
||||
|
||||
1
|
|
@ -9,9 +9,9 @@ New users of aggregate should enable the `aggregateinternal => 1` option in the
|
|||
.setup file. If you don't do so, you will need to enable the [[html]] plugin
|
||||
as well as aggregate itself, since feed entries will be stored as HTML.
|
||||
|
||||
The [[meta]] and [[tag]] plugins are also recommended. The
|
||||
[[htmltidy]] plugin is suggested, since feeds can easily contain html
|
||||
problems, some of which tidy can fix.
|
||||
The [[meta]] and [[tag]] plugins are also recommended. Either the
|
||||
[[htmltidy]] or [[htmlbalance]] plugin is suggested, since feeds can easily
|
||||
contain html problems, some of which these plugins can fix.
|
||||
|
||||
You will need to run ikiwiki periodically from a cron job, passing it the
|
||||
--aggregate parameter, to make it check for new posts. Here's an example
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
[[!template id=plugin name=htmlbalance author="Simon McVittie"]]
|
||||
[[!tag type/html]]
|
||||
|
||||
This plugin ensures that the HTML emitted by ikiwiki contains well-balanced
|
||||
HTML tags, by parsing it with HTML::TreeBuilder and re-serializing it. This
|
||||
acts as a lighter-weight alternative to [[plugins/htmltidy]]; it doesn't
|
||||
ensure validity, but it does at least ensure that formatting from a
|
||||
blog post pulled in by \[[![[ikiwiki/directive/inline]]]] doesn't
|
||||
leak into the rest of the page.
|
|
@ -7,4 +7,5 @@ emitted by ikiwiki. Besides being nicely formatted, this helps ensure that
|
|||
even if users enter suboptimal html, your wiki generates valid html.
|
||||
|
||||
Note that since tidy is an external program, that is run each time a page
|
||||
is built, this plugin will slow ikiwiki down somewhat.
|
||||
is built, this plugin will slow ikiwiki down somewhat. [[plugins/htmlbalance]]
|
||||
might provide a faster alternative.
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
#!/usr/bin/perl
|
||||
use warnings;
|
||||
use strict;
|
||||
use Test::More tests => 7;
|
||||
|
||||
BEGIN { use_ok("IkiWiki::Plugin::htmlbalance"); }
|
||||
|
||||
is(IkiWiki::Plugin::htmlbalance::sanitize(content => "<br></br>"), "<br />");
|
||||
is(IkiWiki::Plugin::htmlbalance::sanitize(content => "<div><p b=\"c\">hello world</div>"), "<div><p b=\"c\">hello world</p></div>");
|
||||
is(IkiWiki::Plugin::htmlbalance::sanitize(content => "<a></a></a>"), "<a></a>");
|
||||
is(IkiWiki::Plugin::htmlbalance::sanitize(content => "<b>foo <a</b>"), "<b>foo </b>");
|
||||
is(IkiWiki::Plugin::htmlbalance::sanitize(content => "<b> foo <a</a></b>"), "<b> foo </b>");
|
||||
is(IkiWiki::Plugin::htmlbalance::sanitize(content => "a>"), "a>");
|
Loading…
Reference in New Issue