search: Converted to use xapian-omega.

Everything is done except for the actual indexing. I plan to do incremental
indexing as pages change.
master
Joey Hess 2008-06-03 15:29:54 -04:00
parent c688863cf1
commit 8a6a5320ed
12 changed files with 169 additions and 116 deletions

View File

@ -16,6 +16,7 @@ perl -MCPAN -e 'install Bundle::IkiWiki::Extras'
=head1 CONTENTS
Search::Xapian
Authen::Passphrase
RPC::XML
File::MimeInfo

View File

@ -1,5 +1,5 @@
#!/usr/bin/perl
# hyperestraier search engine plugin
# xapian-omega search engine plugin
package IkiWiki::Plugin::search;
use warnings;
@ -7,33 +7,32 @@ use strict;
use IkiWiki 2.00;
sub import { #{{{
hook(type => "getopt", id => "hyperestraier",
call => \&getopt);
hook(type => "checkconfig", id => "hyperestraier",
call => \&checkconfig);
hook(type => "pagetemplate", id => "hyperestraier",
call => \&pagetemplate);
hook(type => "delete", id => "hyperestraier",
call => \&delete);
hook(type => "change", id => "hyperestraier",
call => \&change);
hook(type => "cgi", id => "hyperestraier",
call => \&cgi);
hook(type => "checkconfig", id => "search", call => \&checkconfig);
hook(type => "pagetemplate", id => "search", call => \&pagetemplate);
hook(type => "delete", id => "search", call => \&delete);
hook(type => "change", id => "search", call => \&change);
hook(type => "cgi", id => "search", call => \&cgi);
} # }}}
sub getopt () { #{{{
eval q{use Getopt::Long};
error($@) if $@;
Getopt::Long::Configure('pass_through');
GetOptions("estseek=s" => \$config{estseek});
} #}}}
sub checkconfig () { #{{{
foreach my $required (qw(url cgiurl)) {
if (! length $config{$required}) {
error(sprintf(gettext("Must specify %s when using the search plugin"), $required));
}
}
if (! exists $config{omega_cgi}) {
$config{omega_cgi}="/usr/lib/cgi-bin/omega/omega";
}
if (! -e $config{wikistatedir}."/xapian" || $config{rebuild}) {
writefile("omega.conf", $config{wikistatedir}."/xapian",
"database_dir .\n".
"template_dir ./templates\n");
writefile("query", $config{wikistatedir}."/xapian/templates",
IkiWiki::misctemplate(gettext("search"),
readfile(IkiWiki::template_file("searchquery.tmpl"))));
}
} #}}}
my $form;
@ -55,93 +54,22 @@ sub pagetemplate (@) { #{{{
} #}}}
sub delete (@) { #{{{
debug(gettext("cleaning hyperestraier search index"));
estcmd("purge -cl");
estcfg();
debug(gettext("cleaning xapian search index"));
} #}}}
sub change (@) { #{{{
debug(gettext("updating hyperestraier search index"));
estcmd("gather -cm -bc -cl -sd",
map {
map {
Encode::encode_utf8($config{destdir}."/".$_)
} @{$renderedfiles{pagename($_)}};
} @_
);
estcfg();
debug(gettext("updating xapian search index"));
} #}}}
sub cgi ($) { #{{{
my $cgi=shift;
if (defined $cgi->param('phrase') || defined $cgi->param("navi")) {
if (defined $cgi->param('P')) {
# only works for GET requests
chdir("$config{wikistatedir}/hyperestraier") || error("chdir: $!");
exec("./".IkiWiki::basename($config{cgiurl})) || error("estseek.cgi failed");
}
} #}}}
my $configured=0;
sub estcfg () { #{{{
return if $configured;
$configured=1;
my $estdir="$config{wikistatedir}/hyperestraier";
my $cgi=IkiWiki::basename($config{cgiurl});
$cgi=~s/\..*$//;
my $newfile="$estdir/$cgi.tmpl.new";
my $cleanup = sub { unlink($newfile) };
open(TEMPLATE, ">:utf8", $newfile) || error("open $newfile: $!", $cleanup);
print TEMPLATE IkiWiki::misctemplate("search",
"<!--ESTFORM-->\n\n<!--ESTRESULT-->\n\n<!--ESTINFO-->\n\n",
forcebaseurl => IkiWiki::dirname($config{cgiurl})."/") ||
error("write $newfile: $!", $cleanup);
close TEMPLATE || error("save $newfile: $!", $cleanup);
rename($newfile, "$estdir/$cgi.tmpl") ||
error("rename $newfile: $!", $cleanup);
$newfile="$estdir/$cgi.conf";
open(TEMPLATE, ">$newfile") || error("open $newfile: $!", $cleanup);
my $template=template("estseek.conf");
eval q{use Cwd 'abs_path'};
$template->param(
index => $estdir,
tmplfile => "$estdir/$cgi.tmpl",
destdir => abs_path($config{destdir}),
url => $config{url},
);
print TEMPLATE $template->output || error("write $newfile: $!", $cleanup);
close TEMPLATE || error("save $newfile: $!", $cleanup);
rename($newfile, "$estdir/$cgi.conf") ||
error("rename $newfile: $!", $cleanup);
$cgi="$estdir/".IkiWiki::basename($config{cgiurl});
unlink($cgi);
my $estseek = defined $config{estseek} ? $config{estseek} : '/usr/lib/estraier/estseek.cgi';
symlink($estseek, $cgi) || error("symlink $estseek $cgi: $!");
} # }}}
sub estcmd ($;@) { #{{{
my @params=split(' ', shift);
push @params, "-cl", "$config{wikistatedir}/hyperestraier";
if (@_) {
push @params, "-";
}
my $pid=open(CHILD, "|-");
if ($pid) {
# parent
foreach (@_) {
print CHILD "$_\n";
}
close(CHILD) || print STDERR "estcmd @params exited nonzero: $?\n";
}
else {
# child
open(STDOUT, "/dev/null"); # shut it up (closing won't work)
exec("estcmd", @params) || error("can't run estcmd");
chdir("$config{wikistatedir}/xapian") || error("chdir: $!");
$ENV{OMEGA_CONFIG_FILE}="./omega.conf";
$ENV{CGIURL}=$config{cgiurl},
exec($config{omega_cgi}) || error("$config{omega_cgi} failed: $!");
}
} #}}}

1
debian/changelog vendored
View File

@ -4,6 +4,7 @@ ikiwiki (2.49) UNRELEASED; urgency=low
* ikiwiki-mass-rebuild: Don't trust $! when setting $)
* inline: The optimisation in 2.41 broke nested inlines. Detect those
and avoid overoptimising.
* search: Converted to use xapian-omega.
-- Joey Hess <joeyh@debian.org> Fri, 30 May 2008 19:08:54 -0400

2
debian/control vendored
View File

@ -14,7 +14,7 @@ Package: ikiwiki
Architecture: all
Depends: ${perl:Depends}, markdown | libtext-markdown-perl, libhtml-scrubber-perl, libhtml-template-perl, libhtml-parser-perl, liburi-perl
Recommends: gcc | c-compiler, libc6-dev | libc-dev, subversion | git-core (>= 1:1.5.0) | tla | bzr (>= 0.91) | mercurial | monotone (>= 0.38), libxml-simple-perl, libnet-openid-consumer-perl, liblwpx-paranoidagent-perl, libtimedate-perl, libcgi-formbuilder-perl (>= 3.05), libcgi-session-perl (>= 4.14-1), libmail-sendmail-perl, libauthen-passphrase-perl
Suggests: viewvc | gitweb | viewcvs, hyperestraier, librpc-xml-perl, libtext-wikiformat-perl, python, python-docutils, polygen, tidy, libxml-feed-perl, libmailtools-perl, perlmagick, libfile-mimeinfo-perl, libcrypt-ssleay-perl, liblocale-gettext-perl (>= 1.05-1), libtext-typography-perl, libtext-csv-perl, libdigest-sha1-perl, graphviz, libnet-amazon-s3-perl
Suggests: viewvc | gitweb | viewcvs, libsearch-xapian-perl, xapian-omega, librpc-xml-perl, libtext-wikiformat-perl, python, python-docutils, polygen, tidy, libxml-feed-perl, libmailtools-perl, perlmagick, libfile-mimeinfo-perl, libcrypt-ssleay-perl, liblocale-gettext-perl (>= 1.05-1), libtext-typography-perl, libtext-csv-perl, libdigest-sha1-perl, graphviz, libnet-amazon-s3-perl
Conflicts: ikiwiki-plugin-table
Replaces: ikiwiki-plugin-table
Provides: ikiwiki-plugin-table

View File

@ -158,8 +158,8 @@ Well, sorta. Rather than implementing YA history browser, it can link to
### Full text search
ikiwiki can use the [[HyperEstraier]] search engine to add powerful
full text search capabilities to your wiki.
ikiwiki can use the xapian search engine to add powerful
full text [[plugins/search]] capabilities to your wiki.
### [[w3mmode]]

View File

@ -156,9 +156,9 @@ use IkiWiki::Setup::Standard {
# base page.
#tagbase => "tag",
# For use with the search plugin if your estseek.cgi is located
# For use with the search plugin if the omega cgi is located
# somewhere else.
#estseek => "/usr/lib/estraier/estseek.cgi",
#omega_cgi => "/usr/lib/cgi-bin/omega/omega",
# For use with the openid plugin, to give an url to a page users
# can use to signup for an OpenID.

View File

@ -1,12 +1,17 @@
[[template id=plugin name=search author="[[Joey]]"]]
[[tag type/useful]]
This plugin is included in ikiwiki, but is not enabled by default. It adds
full text search to ikiwiki, using the [[HyperEstraier]] engine.
This plugin adds full text search to ikiwiki, using the
[xapian](http://xapian.org/) engine and its
[omega](http://xapian.org/docs/omega/overview.html) frontend.
It's possible to configure HyperEstraier via one of ikiwiki's
[[templates|wikitemplates]], but for most users, no configuration should be
needed aside from enabling the plugin.
Ikiwiki will handle indexing new and changed page contents, using the
[[cpan Search::Xapian]] perl modules. Note that it indexes page contents
before they are preprocessed and converted to html, as this tends to
produce less noisy search results. Also, since it only indexes page
contents, files copied by the [[rawhtml]] plugin will not be indexed, nor
will other types of data files.
This plugin has a configuration option. To change the path to estseek.cgi,
set `--estseek=/path/to/estseek.cgi`
There is one setting you may need to use in the config file. `omega_cgi`
should point to the location of the omega cgi program. The default location
is `/usr/lib/cgi-bin/omega/omega`.

View File

@ -42,3 +42,5 @@ Now I did a `rm -rf ~wiki/wiki/.ikiwiki/hyperestraier` and re-ran
`--rebuild`ing once more, I'm back to the previous error message.
--[[tschwinge]]
I guess this is fixed now that it uses xapian. :-) --[[Joey]]

View File

@ -1,3 +1,5 @@
[[done]], using xapian-omega! --[[Joey]]
After using it for a while, my feeling is that [[hyperestraier]], as used in
the [[plugins/search]] plugin, is not robust enough for ikiwiki. It doesn't
upgrade well, and it has a habit of sig-11 on certain input from time to

View File

@ -21,15 +21,14 @@ located in /usr/share/ikiwiki/templates by default.
* `inlinepage.tmpl` - Used for adding a page inline in a blog
page.
* `archivepage.tmpl` - Used for listing a page in a blog archive page.
* `estseek.conf` - Not a html template, this is actually a template for
a config file for the [[HyperEstraier]] search engine. If you like you
can read the [[HyperEstraier]] docs and configure it using this.
* `blogpost.tmpl` - Used for a form to add a post to a blog (and a rss/atom links)
* `feedlink.tmpl` - Used to add rss/atom links if blogpost.tmpl is not used.
* `aggregatepost.tmpl` - Used by the [[plugins/aggregate]] plugin to create
a page for a post.
* `searchform.tmpl` - Used by the [[plugins/search]] plugin to add a search
form to wiki pages.
* `searchquery.tmpl` - This is an omega template, used by the
[[plugins/search]] plugin.
The [[plugins/pagetemplate]] plugin can allow individual pages to use a
different template than `page.tmpl`.

View File

@ -1,7 +1,5 @@
<form method="get" action="<TMPL_VAR SEARCHACTION>" id="searchform">
<div>
<input type="text" name="phrase" value="" size="16" />
<input type="hidden" name="enc" value="UTF-8" />
<input type="hidden" name="do" value="hyperestraier" />
<input type="text" name="P" value="" size="16" />
</div>
</form>

View File

@ -0,0 +1,117 @@
$set{thousand,$.}$set{decimal,.}$setmap{BN,,Any Country,uk,England,fr,France}
${
$def{PREV,
$if{$ne{$topdoc,0},<INPUT TYPE=image NAME="&lt;" ALT="&lt;"
SRC="/images/xapian-omega/prev.png" BORDER=0 HEIGHT=30 WIDTH=30>,
<IMG ALT="" SRC="/images/xapian-omega/prevoff.png" HEIGHT=30 WIDTH=30>}
}
$def{NEXT,
$if{$ne{$last,$msize},<INPUT TYPE=image NAME="&gt;" ALT="&gt;"
SRC="/images/xapian-omega/next.png" BORDER=0 HEIGHT=30 WIDTH=30>,
<IMG ALT="" SRC="/images/xapian-omega/nextoff.png" HEIGHT=30 WIDTH=30>}
}
$def{P,<INPUT TYPE=image NAME="$1" VALUE="$1" SRC="/images/xapian-omega/page-$2.png" BORDER=0$opt{a} ALT="$1">}
$def{PAGE,$if{$gt{$1,9},$if{$gt{$1,99},$P{$1,$div{$1,100}}}$P{$1,$mod{$div{$1,10},10}}}$P{$1,$mod{$1,10}}}
$def{S,<IMG SRC="/images/xapian-omega/page-$2s.png"$opt{a} ALT=$1>}
$def{SPAGE,$if{$gt{$1,9},$if{$gt{$1,99},$S{$1,$div{$1,100}}}$S{$1,$mod{$div{$1,10},10}}}$S{$1,$mod{$1,10}}}
}
$def{PREV,$if{$ne{$topdoc,0},<INPUT TYPE=submit NAME="&lt;" VALUE="Previous">}}
$def{PAGE,<INPUT TYPE=submit NAME="[" VALUE="$1">}
$def{SPAGE,<INPUT TYPE=submit NAME="[" VALUE="$1" DISABLED=disabled>}
$def{NEXT,$if{$ne{$last,$msize},<INPUT TYPE=submit NAME="&gt;" VALUE="Next">}}
<p>
<FORM NAME=P METHOD=GET
ACTION="$html{$env{CGIURL}}" TARGET="_top">
<center>
<INPUT NAME=P VALUE="$html{$query}" SIZE=65>
<INPUT TYPE=SUBMIT VALUE="Search">
<hr>
<SELECT NAME=DEFAULTOP>
<OPTION VALUE=or $if{$eq{$defaultop,or},SELECTED}>Matching any words
<OPTION VALUE=and $if{$eq{$defaultop,and},SELECTED}>Matching all words
</SELECT>
$if{$opt{topterms},
<div title="Suggested terms to add to your query"
style="text-align:left;background:#cfc;border:1px solid green;padding:2px;font:11px verdana$. arial$. helvetica$. sans-serif;">
$map{$topterms,<span style="white-space:nowrap"><INPUT TYPE=checkbox NAME=X VALUE="$prettyterm{$_}" onClick="C(this)">$prettyterm{$_}</span> }
<BR><NOSCRIPT><INPUT TYPE=hidden NAME=ADD VALUE=1></NOSCRIPT>
</div>
}
$or{$html{$error},
$if{$eq{$msize,0},
$if{$query,No documents match your query,
<hr>Searching $nice{$dbsize} documents
},
$if{$not{$msizeexact},
$nice{$add{$topdoc,1}}-$nice{$last} of about $nice{$msize} matches,
$if{$and{$eq{$last,$msize},$eq{$topdoc,0}},
All $nice{$msize} matches,
$nice{$add{$topdoc,1}}$if{$ne{$add{$topdoc,1},$last},-$nice{$last}} of exactly $nice{$msize} matches}
}
<hr>
</center>
$list{$map{$queryterms,$list{$html{$uniq{$unstem{$_}}},<b>,</b>/<b>,</b>}:&nbsp;$nice{$freq{$_}}},Term frequencies: ,$. ,}
<br><small>Search took $time seconds</small>
<table>
$hitlist{<tr><td valign=top>
${<IMG SRC="/images/xapian-omega/score-$div{$percentage,10}.png" ALT="$percentage%" HEIGHT=16 WIDTH=32>}
<div title="$percentage%" style='float:left;width:60px;height:10px;border:1px solid black;margin-top:4px;'>
<div style='width:$div{$mul{$percentage,6},10}px; height:10px; background-color: red;'>
</div></div>
<div style='float:left;margin-top:2px;font-size:x-small;'>
<span title="$html{$date{$field{modtime},%Y-%m-%d %H:%M:%S}}">
Modified:<br><b>$html{$date{$field{modtime},%Y-%m-%d}}</b></span><br>
$if{$field{language},Language: <b>$html{$field{language}}</b><br>}
$if{$field{size},<span title="$html{$field{size}} bytes">Size: <b>$html{$filesize{$field{size}}}</b></span><br>}
</div>
</td>
<td><B><A HREF="$field{url}">$html{$or{$field{caption},$field{title},$field{url},Untitled}}</A></B><BR>
<small>$highlight{$field{sample},$terms}$if{$field{sample},...}</small><br>
<A HREF="$field{url}">$html{$field{url}}</A><br>
<small>
$percentage% relevant$. matching:
<i>$list{$map{$terms,$html{$prettyterm{$_}}},$. ,</i> and <i>}</i></small>${for lynx:}<p></td></tr>}
</table>
<br><center>
${suppress next, prev, and page links if there's only one page}
$if{$ne{$lastpage,1},
$set{a,$if{$opt{pagelink_height}, HEIGHT=$opt{pagelink_height}}$if{$opt{pagelink_width}, WIDTH=$opt{pagelink_width}}}
${1-W ... X-(this)-Y ...}
$set{w,$min{3,$add{$thispage,-1}}}
$set{x,$max{$add{$opt{w},1},$add{$thispage,-3}}}
$set{y,$min{$lastpage,$add{$thispage,8}}}
$PREV
$map{$range{1,$opt{w}},$PAGE{$_}}
$if{$ne{$add{$opt{w},1},$opt{x}},...}
$map{$range{$opt{x},$add{$thispage,-1}},$PAGE{$_}}
$SPAGE{$thispage}
$map{$range{$add{$thispage,1},$opt{y}},$PAGE{$_}}
$if{$ne{$opt{y},$lastpage},...}
$NEXT
}
}}
</center><br>
$if{$dbname,<INPUT TYPE=hidden NAME=DB VALUE="$html{$dbname}">}
$if{$ne{$topdoc,0},<INPUT TYPE=hidden NAME=TOPDOC VALUE=$topdoc>}
$if{$ne{$hitsperpage,10},<INPUT TYPE=hidden NAME=HITSPERPAGE VALUE=$hitsperpage>}
$if{$fmt,<INPUT TYPE=hidden NAME=FMT VALUE="$html{$fmt}">}
$if{$cgi{COLLAPSE},<INPUT TYPE=hidden NAME=COLLAPSE VALUE="$html{$cgi{COLLAPSE}}">}
$if{$queryterms,<INPUT TYPE=hidden NAME=xP VALUE="$html{$queryterms}">}
<INPUT TYPE=hidden NAME=xDB VALUE="$html{$dbname}">
<INPUT TYPE=hidden NAME=xFILTERS VALUE="$html{$filters}">
$list{$relevants,<INPUT TYPE=hidden NAME=R VALUE=",.,">}
$if{$cgi{THRESHOLD},<INPUT TYPE=hidden NAME=THRESHOLD VALUE="$html{$cgi{THRESHOLD}}">}
</FORM>
<hr><div align=right><i><small><a href="http://www.xapian.org/">$html{$version}</a></small></i></div>