htmlbalance: don't compact whitespace, and set misc other options

Not compacting whitespace is the most important one: now that we run
sanitize hooks on individual posted comments in the comments plugin,
whitespace that is significant to Markdown (but not HTML) is lost.
(cherry picked from commit cb5aaa3cee)
master
Simon McVittie 2008-11-18 11:25:13 +00:00 committed by Joey Hess
parent 88e8d4bf8d
commit 473160c947
1 changed files with 9 additions and 1 deletions

View File

@ -30,7 +30,15 @@ sub sanitize (@) { #{{{
my %params=@_; my %params=@_;
my $ret = ''; my $ret = '';
my $tree = HTML::TreeBuilder->new_from_content($params{content}); my $tree = HTML::TreeBuilder->new();
$tree->ignore_unknown(0);
$tree->ignore_ignorable_whitespace(0);
$tree->no_space_compacting(1);
$tree->p_strict(1);
$tree->store_comments(0);
$tree->store_declarations(0);
$tree->store_pis(0);
$tree->parse_content($params{content});
my @nodes = $tree->disembowel(); my @nodes = $tree->disembowel();
foreach my $node (@nodes) { foreach my $node (@nodes) {
if (ref $node) { if (ref $node) {