615 lines
19 KiB
Markdown
615 lines
19 KiB
Markdown
The u32 page is excellent, but I wonder if documenting the procedure here
|
|
would be worthwhile. Who knows, the remote site might disappear. But also
|
|
there are some variations on the approach that might be useful:
|
|
|
|
* using a python script and the dom library to extract the page names from
|
|
Special:Allpages (such as
|
|
<http://www.staff.ncl.ac.uk/jon.dowland/unix/docs/get_pagenames.py>)
|
|
* Or, querying the mysql back-end to get the names
|
|
* using WWW::MediaWiki for importing/exporting pages from the wiki, instead
|
|
of Special::Export
|
|
|
|
Also, some detail on converting mediawiki transclusion to ikiwiki inlines...
|
|
|
|
-- [[users/Jon]]
|
|
|
|
> "Who knows, the remote site might disappear.". Right now, it appears to
|
|
> have done just that. -- [[users/Jon]]
|
|
|
|
|
|
The iki-fast-load ruby script from the u32 page is given below:
|
|
|
|
#!/usr/bin/env ruby
|
|
|
|
# This script is called on the final sorted, de-spammed revision
|
|
# XML file.
|
|
#
|
|
# It doesn't currently check for no-op revisions... I believe
|
|
# that git-fast-load will dutifully load them even though nothing
|
|
# happened. I don't care to solve this by adding a file cache
|
|
# to this script. You can run iki-diff-next.rb to highlight any
|
|
# empty revisions that need to be removed.
|
|
#
|
|
# This turns each node into an equivalent file.
|
|
# It does not convert spaces to underscores in file names.
|
|
# This would break wikilinks.
|
|
# I suppose you could fix this with mod_speling or mod_rewrite.
|
|
#
|
|
# It replaces nodes in the Image: namespace with the files themselves.
|
|
|
|
|
|
require 'rubygems'
|
|
require 'node-callback'
|
|
require 'time'
|
|
require 'ostruct'
|
|
|
|
|
|
# pipe is the stream to receive the git-fast-import commands
|
|
# putfrom is true if this branch has existing commits on it, false if not.
|
|
def format_git_commit(pipe, f)
|
|
# Need to escape backslashes and double-quotes for git?
|
|
# No, git breaks when I do this.
|
|
# For the filename "path with \\", git sez: bad default revision 'HEAD'
|
|
# filename = '"' + filename.gsub('\\', '\\\\\\\\').gsub('"', '\\"') + '"'
|
|
|
|
# In the calls below, length must be the size in bytes!!
|
|
# TODO: I haven't figured out how this works in the land of UTF8 and Ruby 1.9.
|
|
pipe.puts "commit #{f.branch}"
|
|
pipe.puts "committer #{f.username} <#{f.email}> #{f.timestamp.rfc2822}"
|
|
pipe.puts "data #{f.message.length}\n#{f.message}\n"
|
|
pipe.puts "from #{f.branch}^0" if f.putfrom
|
|
pipe.puts "M 644 inline #{f.filename}"
|
|
pipe.puts "data #{f.content.length}\n#{f.content}\n"
|
|
pipe.puts
|
|
end
|
|
|
|
> Would be nice to know where you could get "node-callbacks"... this thing is useless without it. --[[users/simonraven]]
|
|
|
|
|
|
Mediawiki.pm - A plugin which supports mediawiki format.
|
|
|
|
#!/usr/bin/perl
|
|
# By Scott Bronson. Licensed under the GPLv2+ License.
|
|
# Extends Ikiwiki to be able to handle Mediawiki markup.
|
|
#
|
|
# To use the Mediawiki Plugin:
|
|
# - Install Text::MediawikiFormat
|
|
# - Turn of prefix_directives in your setup file.
|
|
# (TODO: we probably don't need to do this anymore?)
|
|
# prefix_directives => 1,
|
|
# - Add this plugin on Ikiwiki's path (perl -V, look for @INC)
|
|
# cp mediawiki.pm something/IkiWiki/Plugin
|
|
# - And enable it in your setup file
|
|
# add_plugins => [qw{mediawiki}],
|
|
# - Finally, turn off the link plugin in setup (this is important)
|
|
# disable_plugins => [qw{link}],
|
|
# - Rebuild everything (actually, this should be automatic right?)
|
|
# - Now all files with a .mediawiki extension should be rendered properly.
|
|
|
|
|
|
package IkiWiki::Plugin::mediawiki;
|
|
|
|
use warnings;
|
|
use strict;
|
|
use IkiWiki 2.00;
|
|
use URI;
|
|
|
|
|
|
# This is a gross hack... We disable the link plugin so that our
|
|
# linkify routine is always called. Then we call the link plugin
|
|
# directly for all non-mediawiki pages. Ouch... Hopefully Ikiwiki
|
|
# will be updated soon to support multiple link plugins.
|
|
require IkiWiki::Plugin::link;
|
|
|
|
# Even if T:MwF is not installed, we can still handle all the linking.
|
|
# The user will just see Mediawiki markup rather than formatted markup.
|
|
eval q{use Text::MediawikiFormat ()};
|
|
my $markup_disabled = $@;
|
|
|
|
# Work around a UTF8 bug in Text::MediawikiFormat
|
|
# http://rt.cpan.org/Public/Bug/Display.html?id=26880
|
|
unless($markup_disabled) {
|
|
no strict 'refs';
|
|
no warnings;
|
|
*{'Text::MediawikiFormat::uri_escape'} = \&URI::Escape::uri_escape_utf8;
|
|
}
|
|
|
|
my %metaheaders; # keeps track of redirects for pagetemplate.
|
|
my %tags; # keeps track of tags for pagetemplate.
|
|
|
|
|
|
sub import { #{{{
|
|
hook(type => "checkconfig", id => "mediawiki", call => \&checkconfig);
|
|
hook(type => "scan", id => "mediawiki", call => \&scan);
|
|
hook(type => "linkify", id => "mediawiki", call => \&linkify);
|
|
hook(type => "htmlize", id => "mediawiki", call => \&htmlize);
|
|
hook(type => "pagetemplate", id => "mediawiki", call => \&pagetemplate);
|
|
} # }}}
|
|
|
|
|
|
sub checkconfig
|
|
{
|
|
return IkiWiki::Plugin::link::checkconfig(@_);
|
|
}
|
|
|
|
|
|
my $link_regexp = qr{
|
|
\[\[(?=[^!]) # beginning of link
|
|
([^\n\r\]#|<>]+) # 1: page to link to
|
|
(?:
|
|
\# # '#', beginning of anchor
|
|
([^|\]]+) # 2: anchor text
|
|
)? # optional
|
|
|
|
(?:
|
|
\| # followed by '|'
|
|
([^\]\|]*) # 3: link text
|
|
)? # optional
|
|
\]\] # end of link
|
|
([a-zA-Z]*) # optional trailing alphas
|
|
}x;
|
|
|
|
|
|
# Convert spaces in the passed-in string into underscores.
|
|
# If passed in undef, returns undef without throwing errors.
|
|
sub underscorize
|
|
{
|
|
my $var = shift;
|
|
$var =~ tr{ }{_} if $var;
|
|
return $var;
|
|
}
|
|
|
|
|
|
# Underscorize, strip leading and trailing space, and scrunch
|
|
# multiple runs of spaces into one underscore.
|
|
sub scrunch
|
|
{
|
|
my $var = shift;
|
|
if($var) {
|
|
$var =~ s/^\s+|\s+$//g; # strip leading and trailing space
|
|
$var =~ s/\s+/ /g; # squash multiple spaces to one
|
|
}
|
|
return $var;
|
|
}
|
|
|
|
|
|
# Translates Mediawiki paths into Ikiwiki paths.
|
|
# It needs to be pretty careful because Mediawiki and Ikiwiki handle
|
|
# relative vs. absolute exactly opposite from each other.
|
|
sub translate_path
|
|
{
|
|
my $page = shift;
|
|
my $path = scrunch(shift);
|
|
|
|
# always start from root unless we're doing relative shenanigans.
|
|
$page = "/" unless $path =~ /^(?:\/|\.\.)/;
|
|
|
|
my @result = ();
|
|
for(split(/\//, "$page/$path")) {
|
|
if($_ eq '..') {
|
|
pop @result;
|
|
} else {
|
|
push @result, $_ if $_ ne "";
|
|
}
|
|
}
|
|
|
|
# temporary hack working around http://ikiwiki.info/bugs/Can__39__t_create_root_page/index.html?updated
|
|
# put this back the way it was once this bug is fixed upstream.
|
|
# This is actually a major problem because now Mediawiki pages can't link from /Git/git-svn to /git-svn. And upstream appears to be uninterested in fixing this bug. :(
|
|
# return "/" . join("/", @result);
|
|
return join("/", @result);
|
|
}
|
|
|
|
|
|
# Figures out the human-readable text for a wikilink
|
|
sub linktext
|
|
{
|
|
my($page, $inlink, $anchor, $title, $trailing) = @_;
|
|
my $link = translate_path($page,$inlink);
|
|
|
|
# translate_path always produces an absolute link.
|
|
# get rid of the leading slash before we display this link.
|
|
$link =~ s#^/##;
|
|
|
|
my $out = "";
|
|
if($title) {
|
|
$out = IkiWiki::pagetitle($title);
|
|
} else {
|
|
$link = $inlink if $inlink =~ /^\s*\//;
|
|
$out = $anchor ? "$link#$anchor" : $link;
|
|
if(defined $title && $title eq "") {
|
|
# a bare pipe appeared in the link...
|
|
# user wants to strip namespace and trailing parens.
|
|
$out =~ s/^[A-Za-z0-9_-]*://;
|
|
$out =~ s/\s*\(.*\)\s*$//;
|
|
}
|
|
# A trailing slash suppresses the leading slash
|
|
$out =~ s#^/(.*)/$#$1#;
|
|
}
|
|
$out .= $trailing if defined $trailing;
|
|
return $out;
|
|
}
|
|
|
|
|
|
sub tagpage ($)
|
|
{
|
|
my $tag=shift;
|
|
|
|
if (exists $config{tagbase} && defined $config{tagbase}) {
|
|
$tag=$config{tagbase}."/".$tag;
|
|
}
|
|
|
|
return $tag;
|
|
}
|
|
|
|
|
|
# Pass a URL and optional text associated with it. This call turns
|
|
# it into fully-formatted HTML the same way Mediawiki would.
|
|
# Counter is used to number untitled links sequentially on the page.
|
|
# It should be set to 1 when you start parsing a new page. This call
|
|
# increments it automatically.
|
|
sub generate_external_link
|
|
{
|
|
my $url = shift;
|
|
my $text = shift;
|
|
my $counter = shift;
|
|
|
|
# Mediawiki trims off trailing commas.
|
|
# And apparently it does entity substitution first.
|
|
# Since we can't, we'll fake it.
|
|
|
|
# trim any leading and trailing whitespace
|
|
$url =~ s/^\s+|\s+$//g;
|
|
|
|
# url properly terminates on > but must special-case >
|
|
my $trailer = "";
|
|
$url =~ s{(\&(?:gt|lt)\;.*)$}{ $trailer = $1, ''; }eg;
|
|
|
|
# Trim some potential trailing chars, put them outside the link.
|
|
my $tmptrail = "";
|
|
$url =~ s{([,)]+)$}{ $tmptrail .= $1, ''; }eg;
|
|
$trailer = $tmptrail . $trailer;
|
|
|
|
my $title = $url;
|
|
if(defined $text) {
|
|
if($text eq "") {
|
|
$text = "[$$counter]";
|
|
$$counter += 1;
|
|
}
|
|
$text =~ s/^\s+|\s+$//g;
|
|
$text =~ s/^\|//;
|
|
} else {
|
|
$text = $url;
|
|
}
|
|
|
|
return "<a href='$url' title='$title'>$text</a>$trailer";
|
|
}
|
|
|
|
|
|
# Called to handle bookmarks like [[#heading]] or <span class="createlink"><a href="http://u32.net/cgi-bin/ikiwiki.cgi?page=%20text%20&from=Mediawiki_Plugin%2Fmediawiki&do=create" rel="nofollow">?</a>#a</span>
|
|
sub generate_fragment_link
|
|
{
|
|
my $url = shift;
|
|
my $text = shift;
|
|
|
|
my $inurl = $url;
|
|
my $intext = $text;
|
|
$url = scrunch($url);
|
|
|
|
if(defined($text) && $text ne "") {
|
|
$text = scrunch($text);
|
|
} else {
|
|
$text = $url;
|
|
}
|
|
|
|
$url = underscorize($url);
|
|
|
|
# For some reason Mediawiki puts blank titles on all its fragment links.
|
|
# I don't see why we would duplicate that behavior here.
|
|
return "<a href='$url'>$text</a>";
|
|
}
|
|
|
|
|
|
sub generate_internal_link
|
|
{
|
|
my($page, $inlink, $anchor, $title, $trailing, $proc) = @_;
|
|
|
|
# Ikiwiki's link link plugin wrecks this line when displaying on the site.
|
|
# Until the code highlighter plugin can turn off link finding,
|
|
# always escape double brackets in double quotes: [[
|
|
if($inlink eq '..') {
|
|
# Mediawiki doesn't touch links like [[..#hi|ho]].
|
|
return "[[" . $inlink . ($anchor?"#$anchor":"") .
|
|
($title?"|$title":"") . "]]" . $trailing;
|
|
}
|
|
|
|
my($linkpage, $linktext);
|
|
if($inlink =~ /^ (:?) \s* Category (\s* \: \s*) ([^\]]*) $/x) {
|
|
# Handle category links
|
|
my $sep = $2;
|
|
$inlink = $3;
|
|
$linkpage = IkiWiki::linkpage(translate_path($page, $inlink));
|
|
if($1) {
|
|
# Produce a link but don't add this page to the given category.
|
|
$linkpage = tagpage($linkpage);
|
|
$linktext = ($title ? '' : "Category$sep") .
|
|
linktext($page, $inlink, $anchor, $title, $trailing);
|
|
$tags{$page}{$linkpage} = 1;
|
|
} else {
|
|
# Add this page to the given category but don't produce a link.
|
|
$tags{$page}{$linkpage} = 1;
|
|
&$proc(tagpage($linkpage), $linktext, $anchor);
|
|
return "";
|
|
}
|
|
} else {
|
|
# It's just a regular link
|
|
$linkpage = IkiWiki::linkpage(translate_path($page, $inlink));
|
|
$linktext = linktext($page, $inlink, $anchor, $title, $trailing);
|
|
}
|
|
|
|
return &$proc($linkpage, $linktext, $anchor);
|
|
}
|
|
|
|
|
|
sub check_redirect
|
|
{
|
|
my %params=@_;
|
|
|
|
my $page=$params{page};
|
|
my $destpage=$params{destpage};
|
|
my $content=$params{content};
|
|
|
|
return "" if $page ne $destpage;
|
|
|
|
if($content !~ /^ \s* \#REDIRECT \s* \[\[ ( [^\]]+ ) \]\]/x) {
|
|
# this page isn't a redirect, render it normally.
|
|
return undef;
|
|
}
|
|
|
|
# The rest of this function is copied from the redir clause
|
|
# in meta::preprocess and actually handles the redirect.
|
|
|
|
my $value = $1;
|
|
$value =~ s/^\s+|\s+$//g;
|
|
|
|
my $safe=0;
|
|
if ($value !~ /^\w+:\/\//) {
|
|
# it's a local link
|
|
my ($redir_page, $redir_anchor) = split /\#/, $value;
|
|
|
|
add_depends($page, $redir_page);
|
|
my $link=bestlink($page, underscorize(translate_path($page,$redir_page)));
|
|
if (! length $link) {
|
|
return "<b>Redirect Error:</b> <nowiki>[[$redir_page]] not found.</nowiki>";
|
|
}
|
|
|
|
$value=urlto($link, $page);
|
|
$value.='#'.$redir_anchor if defined $redir_anchor;
|
|
$safe=1;
|
|
|
|
# redir cycle detection
|
|
$pagestate{$page}{mediawiki}{redir}=$link;
|
|
my $at=$page;
|
|
my %seen;
|
|
while (exists $pagestate{$at}{mediawiki}{redir}) {
|
|
if ($seen{$at}) {
|
|
return "<b>Redirect Error:</b> cycle found on <nowiki>[[$at]]</nowiki>";
|
|
}
|
|
$seen{$at}=1;
|
|
$at=$pagestate{$at}{mediawiki}{redir};
|
|
}
|
|
} else {
|
|
# it's an external link
|
|
$value = encode_entities($value);
|
|
}
|
|
|
|
my $redir="<meta http-equiv=\"refresh\" content=\"0; URL=$value\" />";
|
|
$redir=scrub($redir) if !$safe;
|
|
push @{$metaheaders{$page}}, $redir;
|
|
|
|
return "Redirecting to $value ...";
|
|
}
|
|
|
|
|
|
# Feed this routine a string containing <nowiki>...</nowiki> sections,
|
|
# this routine calls your callback for every section not within nowikis,
|
|
# collecting its return values and returning the rewritten string.
|
|
sub skip_nowiki
|
|
{
|
|
my $content = shift;
|
|
my $proc = shift;
|
|
|
|
my $result = "";
|
|
my $state = 0;
|
|
|
|
for(split(/(<nowiki[^>]*>.*?<\/nowiki\s*>)/s, $content)) {
|
|
$result .= ($state ? $_ : &$proc($_));
|
|
$state = !$state;
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
|
|
|
|
# Converts all links in the page, wiki and otherwise.
|
|
sub linkify (@)
|
|
{
|
|
my %params=@_;
|
|
|
|
my $page=$params{page};
|
|
my $destpage=$params{destpage};
|
|
my $content=$params{content};
|
|
|
|
my $file=$pagesources{$page};
|
|
my $type=pagetype($file);
|
|
my $counter = 1;
|
|
|
|
if($type ne 'mediawiki') {
|
|
return IkiWiki::Plugin::link::linkify(@_);
|
|
}
|
|
|
|
my $redir = check_redirect(%params);
|
|
return $redir if defined $redir;
|
|
|
|
# this code was copied from MediawikiFormat.pm.
|
|
# Heavily changed because MF.pm screws up escaping when it does
|
|
# this awful hack: $uricCheat =~ tr/://d;
|
|
my $schemas = [qw(http https ftp mailto gopher)];
|
|
my $re = join "|", map {qr/\Q$_\E/} @$schemas;
|
|
my $schemes = qr/(?:$re)/;
|
|
# And this is copied from URI:
|
|
my $reserved = q(;/?@&=+$,); # NOTE: no colon or [] !
|
|
my $uric = quotemeta($reserved) . $URI::unreserved . "%#";
|
|
|
|
my $result = skip_nowiki($content, sub {
|
|
$_ = shift;
|
|
|
|
# Escape any anchors
|
|
#s/<(a[\s>\/])/<$1/ig;
|
|
# Disabled because this appears to screw up the aggregate plugin.
|
|
# I guess we'll rely on Iki to post-sanitize this sort of stuff.
|
|
|
|
# Replace external links, http://blah or [http://blah]
|
|
s{\b($schemes:[$uric][:$uric]+)|\[($schemes:[$uric][:$uric]+)([^\]]*?)\]}{
|
|
generate_external_link($1||$2, $3, \$counter)
|
|
}eg;
|
|
|
|
# Handle links that only contain fragments.
|
|
s{ \[\[ \s* (\#[^|\]'"<>&;]+) (?:\| ([^\]'"<>&;]*))? \]\] }{
|
|
generate_fragment_link($1, $2)
|
|
}xeg;
|
|
|
|
# Match all internal links
|
|
s{$link_regexp}{
|
|
generate_internal_link($page, $1, $2, $3, $4, sub {
|
|
my($linkpage, $linktext, $anchor) = @_;
|
|
return htmllink($page, $destpage, $linkpage,
|
|
linktext => $linktext,
|
|
anchor => underscorize(scrunch($anchor)));
|
|
});
|
|
}eg;
|
|
|
|
return $_;
|
|
});
|
|
|
|
return $result;
|
|
}
|
|
|
|
|
|
# Find all WikiLinks in the page.
|
|
sub scan (@)
|
|
{
|
|
my %params = @_;
|
|
my $page=$params{page};
|
|
my $content=$params{content};
|
|
|
|
my $file=$pagesources{$page};
|
|
my $type=pagetype($file);
|
|
|
|
if($type ne 'mediawiki') {
|
|
return IkiWiki::Plugin::link::scan(@_);
|
|
}
|
|
|
|
skip_nowiki($content, sub {
|
|
$_ = shift;
|
|
while(/$link_regexp/g) {
|
|
generate_internal_link($page, $1, '', '', '', sub {
|
|
my($linkpage, $linktext, $anchor) = @_;
|
|
push @{$links{$page}}, $linkpage;
|
|
return undef;
|
|
});
|
|
}
|
|
return '';
|
|
});
|
|
}
|
|
|
|
|
|
# Convert the page to HTML.
|
|
sub htmlize (@)
|
|
{
|
|
my %params=@_;
|
|
my $page = $params{page};
|
|
my $content = $params{content};
|
|
|
|
|
|
return $content if $markup_disabled;
|
|
|
|
# Do a little preprocessing to babysit Text::MediawikiFormat
|
|
# If a line begins with tabs, T:MwF won't convert it into preformatted blocks.
|
|
$content =~ s/^\t/ /mg;
|
|
|
|
my $ret = Text::MediawikiFormat::format($content, {
|
|
|
|
allowed_tags => [#HTML
|
|
# MediawikiFormat default
|
|
qw(b big blockquote br caption center cite code dd
|
|
div dl dt em font h1 h2 h3 h4 h5 h6 hr i li ol p
|
|
pre rb rp rt ruby s samp small strike strong sub
|
|
sup table td th tr tt u ul var),
|
|
# Mediawiki Specific
|
|
qw(nowiki),
|
|
# Our additions
|
|
qw(del ins), # These should have been added all along.
|
|
qw(span), # Mediawiki allows span but that's rather scary...?
|
|
qw(a), # this is unfortunate; should handle links after rendering the page.
|
|
],
|
|
|
|
allowed_attrs => [
|
|
qw(title align lang dir width height bgcolor),
|
|
qw(clear), # BR
|
|
qw(noshade), # HR
|
|
qw(cite), # BLOCKQUOTE, Q
|
|
qw(size face color), # FONT
|
|
# For various lists, mostly deprecated but safe
|
|
qw(type start value compact),
|
|
# Tables
|
|
qw(summary width border frame rules cellspacing
|
|
cellpadding valign char charoff colgroup col
|
|
span abbr axis headers scope rowspan colspan),
|
|
qw(id class name style), # For CSS
|
|
# Our additions
|
|
qw(href),
|
|
],
|
|
|
|
}, {
|
|
extended => 0,
|
|
absolute_links => 0,
|
|
implicit_links => 0
|
|
});
|
|
|
|
return $ret;
|
|
}
|
|
|
|
|
|
# This is only needed to support the check_redirect call.
|
|
sub pagetemplate (@)
|
|
{
|
|
my %params = @_;
|
|
my $page = $params{page};
|
|
my $destpage = $params{destpage};
|
|
my $template = $params{template};
|
|
|
|
# handle metaheaders for redirects
|
|
if (exists $metaheaders{$page} && $template->query(name => "meta")) {
|
|
# avoid duplicate meta lines
|
|
my %seen;
|
|
$template->param(meta => join("\n", grep { (! $seen{$_}) && ($seen{$_}=1) } @{$metaheaders{$page}}));
|
|
}
|
|
|
|
$template->param(tags => [
|
|
map {
|
|
link => htmllink($page, $destpage, tagpage($_), rel => "tag")
|
|
}, sort keys %{$tags{$page}}
|
|
]) if exists $tags{$page} && %{$tags{$page}} && $template->query(name => "tags");
|
|
|
|
# It's an rss/atom template. Add any categories.
|
|
if ($template->query(name => "categories")) {
|
|
if (exists $tags{$page} && %{$tags{$page}}) {
|
|
$template->param(categories => [map { category => $_ },
|
|
sort keys %{$tags{$page}}]);
|
|
}
|
|
}
|
|
}
|
|
|
|
1
|