search: Converted to use xapian-omega.

Everything is done except for the actual indexing. I plan to do incremental
indexing as pages change.
master
Joey Hess 2008-06-03 15:29:54 -04:00
parent c688863cf1
commit 8a6a5320ed
12 changed files with 169 additions and 116 deletions

View File

@ -16,6 +16,7 @@ perl -MCPAN -e 'install Bundle::IkiWiki::Extras'
=head1 CONTENTS =head1 CONTENTS
Search::Xapian
Authen::Passphrase Authen::Passphrase
RPC::XML RPC::XML
File::MimeInfo File::MimeInfo

View File

@ -1,5 +1,5 @@
#!/usr/bin/perl #!/usr/bin/perl
# hyperestraier search engine plugin # xapian-omega search engine plugin
package IkiWiki::Plugin::search; package IkiWiki::Plugin::search;
use warnings; use warnings;
@ -7,25 +7,11 @@ use strict;
use IkiWiki 2.00; use IkiWiki 2.00;
sub import { #{{{ sub import { #{{{
hook(type => "getopt", id => "hyperestraier", hook(type => "checkconfig", id => "search", call => \&checkconfig);
call => \&getopt); hook(type => "pagetemplate", id => "search", call => \&pagetemplate);
hook(type => "checkconfig", id => "hyperestraier", hook(type => "delete", id => "search", call => \&delete);
call => \&checkconfig); hook(type => "change", id => "search", call => \&change);
hook(type => "pagetemplate", id => "hyperestraier", hook(type => "cgi", id => "search", call => \&cgi);
call => \&pagetemplate);
hook(type => "delete", id => "hyperestraier",
call => \&delete);
hook(type => "change", id => "hyperestraier",
call => \&change);
hook(type => "cgi", id => "hyperestraier",
call => \&cgi);
} # }}}
sub getopt () { #{{{
eval q{use Getopt::Long};
error($@) if $@;
Getopt::Long::Configure('pass_through');
GetOptions("estseek=s" => \$config{estseek});
} # }}} } # }}}
sub checkconfig () { #{{{ sub checkconfig () { #{{{
@ -34,6 +20,19 @@ sub checkconfig () { #{{{
error(sprintf(gettext("Must specify %s when using the search plugin"), $required)); error(sprintf(gettext("Must specify %s when using the search plugin"), $required));
} }
} }
if (! exists $config{omega_cgi}) {
$config{omega_cgi}="/usr/lib/cgi-bin/omega/omega";
}
if (! -e $config{wikistatedir}."/xapian" || $config{rebuild}) {
writefile("omega.conf", $config{wikistatedir}."/xapian",
"database_dir .\n".
"template_dir ./templates\n");
writefile("query", $config{wikistatedir}."/xapian/templates",
IkiWiki::misctemplate(gettext("search"),
readfile(IkiWiki::template_file("searchquery.tmpl"))));
}
} #}}} } #}}}
my $form; my $form;
@ -55,93 +54,22 @@ sub pagetemplate (@) { #{{{
} #}}} } #}}}
sub delete (@) { #{{{ sub delete (@) { #{{{
debug(gettext("cleaning hyperestraier search index")); debug(gettext("cleaning xapian search index"));
estcmd("purge -cl");
estcfg();
} #}}} } #}}}
sub change (@) { #{{{ sub change (@) { #{{{
debug(gettext("updating hyperestraier search index")); debug(gettext("updating xapian search index"));
estcmd("gather -cm -bc -cl -sd",
map {
map {
Encode::encode_utf8($config{destdir}."/".$_)
} @{$renderedfiles{pagename($_)}};
} @_
);
estcfg();
} #}}} } #}}}
sub cgi ($) { #{{{ sub cgi ($) { #{{{
my $cgi=shift; my $cgi=shift;
if (defined $cgi->param('phrase') || defined $cgi->param("navi")) { if (defined $cgi->param('P')) {
# only works for GET requests # only works for GET requests
chdir("$config{wikistatedir}/hyperestraier") || error("chdir: $!"); chdir("$config{wikistatedir}/xapian") || error("chdir: $!");
exec("./".IkiWiki::basename($config{cgiurl})) || error("estseek.cgi failed"); $ENV{OMEGA_CONFIG_FILE}="./omega.conf";
} $ENV{CGIURL}=$config{cgiurl},
} #}}} exec($config{omega_cgi}) || error("$config{omega_cgi} failed: $!");
my $configured=0;
sub estcfg () { #{{{
return if $configured;
$configured=1;
my $estdir="$config{wikistatedir}/hyperestraier";
my $cgi=IkiWiki::basename($config{cgiurl});
$cgi=~s/\..*$//;
my $newfile="$estdir/$cgi.tmpl.new";
my $cleanup = sub { unlink($newfile) };
open(TEMPLATE, ">:utf8", $newfile) || error("open $newfile: $!", $cleanup);
print TEMPLATE IkiWiki::misctemplate("search",
"<!--ESTFORM-->\n\n<!--ESTRESULT-->\n\n<!--ESTINFO-->\n\n",
forcebaseurl => IkiWiki::dirname($config{cgiurl})."/") ||
error("write $newfile: $!", $cleanup);
close TEMPLATE || error("save $newfile: $!", $cleanup);
rename($newfile, "$estdir/$cgi.tmpl") ||
error("rename $newfile: $!", $cleanup);
$newfile="$estdir/$cgi.conf";
open(TEMPLATE, ">$newfile") || error("open $newfile: $!", $cleanup);
my $template=template("estseek.conf");
eval q{use Cwd 'abs_path'};
$template->param(
index => $estdir,
tmplfile => "$estdir/$cgi.tmpl",
destdir => abs_path($config{destdir}),
url => $config{url},
);
print TEMPLATE $template->output || error("write $newfile: $!", $cleanup);
close TEMPLATE || error("save $newfile: $!", $cleanup);
rename($newfile, "$estdir/$cgi.conf") ||
error("rename $newfile: $!", $cleanup);
$cgi="$estdir/".IkiWiki::basename($config{cgiurl});
unlink($cgi);
my $estseek = defined $config{estseek} ? $config{estseek} : '/usr/lib/estraier/estseek.cgi';
symlink($estseek, $cgi) || error("symlink $estseek $cgi: $!");
} # }}}
sub estcmd ($;@) { #{{{
my @params=split(' ', shift);
push @params, "-cl", "$config{wikistatedir}/hyperestraier";
if (@_) {
push @params, "-";
}
my $pid=open(CHILD, "|-");
if ($pid) {
# parent
foreach (@_) {
print CHILD "$_\n";
}
close(CHILD) || print STDERR "estcmd @params exited nonzero: $?\n";
}
else {
# child
open(STDOUT, "/dev/null"); # shut it up (closing won't work)
exec("estcmd", @params) || error("can't run estcmd");
} }
} #}}} } #}}}

1
debian/changelog vendored
View File

@ -4,6 +4,7 @@ ikiwiki (2.49) UNRELEASED; urgency=low
* ikiwiki-mass-rebuild: Don't trust $! when setting $) * ikiwiki-mass-rebuild: Don't trust $! when setting $)
* inline: The optimisation in 2.41 broke nested inlines. Detect those * inline: The optimisation in 2.41 broke nested inlines. Detect those
and avoid overoptimising. and avoid overoptimising.
* search: Converted to use xapian-omega.
-- Joey Hess <joeyh@debian.org> Fri, 30 May 2008 19:08:54 -0400 -- Joey Hess <joeyh@debian.org> Fri, 30 May 2008 19:08:54 -0400

2
debian/control vendored
View File

@ -14,7 +14,7 @@ Package: ikiwiki
Architecture: all Architecture: all
Depends: ${perl:Depends}, markdown | libtext-markdown-perl, libhtml-scrubber-perl, libhtml-template-perl, libhtml-parser-perl, liburi-perl Depends: ${perl:Depends}, markdown | libtext-markdown-perl, libhtml-scrubber-perl, libhtml-template-perl, libhtml-parser-perl, liburi-perl
Recommends: gcc | c-compiler, libc6-dev | libc-dev, subversion | git-core (>= 1:1.5.0) | tla | bzr (>= 0.91) | mercurial | monotone (>= 0.38), libxml-simple-perl, libnet-openid-consumer-perl, liblwpx-paranoidagent-perl, libtimedate-perl, libcgi-formbuilder-perl (>= 3.05), libcgi-session-perl (>= 4.14-1), libmail-sendmail-perl, libauthen-passphrase-perl Recommends: gcc | c-compiler, libc6-dev | libc-dev, subversion | git-core (>= 1:1.5.0) | tla | bzr (>= 0.91) | mercurial | monotone (>= 0.38), libxml-simple-perl, libnet-openid-consumer-perl, liblwpx-paranoidagent-perl, libtimedate-perl, libcgi-formbuilder-perl (>= 3.05), libcgi-session-perl (>= 4.14-1), libmail-sendmail-perl, libauthen-passphrase-perl
Suggests: viewvc | gitweb | viewcvs, hyperestraier, librpc-xml-perl, libtext-wikiformat-perl, python, python-docutils, polygen, tidy, libxml-feed-perl, libmailtools-perl, perlmagick, libfile-mimeinfo-perl, libcrypt-ssleay-perl, liblocale-gettext-perl (>= 1.05-1), libtext-typography-perl, libtext-csv-perl, libdigest-sha1-perl, graphviz, libnet-amazon-s3-perl Suggests: viewvc | gitweb | viewcvs, libsearch-xapian-perl, xapian-omega, librpc-xml-perl, libtext-wikiformat-perl, python, python-docutils, polygen, tidy, libxml-feed-perl, libmailtools-perl, perlmagick, libfile-mimeinfo-perl, libcrypt-ssleay-perl, liblocale-gettext-perl (>= 1.05-1), libtext-typography-perl, libtext-csv-perl, libdigest-sha1-perl, graphviz, libnet-amazon-s3-perl
Conflicts: ikiwiki-plugin-table Conflicts: ikiwiki-plugin-table
Replaces: ikiwiki-plugin-table Replaces: ikiwiki-plugin-table
Provides: ikiwiki-plugin-table Provides: ikiwiki-plugin-table

View File

@ -158,8 +158,8 @@ Well, sorta. Rather than implementing YA history browser, it can link to
### Full text search ### Full text search
ikiwiki can use the [[HyperEstraier]] search engine to add powerful ikiwiki can use the xapian search engine to add powerful
full text search capabilities to your wiki. full text [[plugins/search]] capabilities to your wiki.
### [[w3mmode]] ### [[w3mmode]]

View File

@ -156,9 +156,9 @@ use IkiWiki::Setup::Standard {
# base page. # base page.
#tagbase => "tag", #tagbase => "tag",
# For use with the search plugin if your estseek.cgi is located # For use with the search plugin if the omega cgi is located
# somewhere else. # somewhere else.
#estseek => "/usr/lib/estraier/estseek.cgi", #omega_cgi => "/usr/lib/cgi-bin/omega/omega",
# For use with the openid plugin, to give an url to a page users # For use with the openid plugin, to give an url to a page users
# can use to signup for an OpenID. # can use to signup for an OpenID.

View File

@ -1,12 +1,17 @@
[[template id=plugin name=search author="[[Joey]]"]] [[template id=plugin name=search author="[[Joey]]"]]
[[tag type/useful]] [[tag type/useful]]
This plugin is included in ikiwiki, but is not enabled by default. It adds This plugin adds full text search to ikiwiki, using the
full text search to ikiwiki, using the [[HyperEstraier]] engine. [xapian](http://xapian.org/) engine and its
[omega](http://xapian.org/docs/omega/overview.html) frontend.
It's possible to configure HyperEstraier via one of ikiwiki's Ikiwiki will handle indexing new and changed page contents, using the
[[templates|wikitemplates]], but for most users, no configuration should be [[cpan Search::Xapian]] perl modules. Note that it indexes page contents
needed aside from enabling the plugin. before they are preprocessed and converted to html, as this tends to
produce less noisy search results. Also, since it only indexes page
contents, files copied by the [[rawhtml]] plugin will not be indexed, nor
will other types of data files.
This plugin has a configuration option. To change the path to estseek.cgi, There is one setting you may need to use in the config file. `omega_cgi`
set `--estseek=/path/to/estseek.cgi` should point to the location of the omega cgi program. The default location
is `/usr/lib/cgi-bin/omega/omega`.

View File

@ -42,3 +42,5 @@ Now I did a `rm -rf ~wiki/wiki/.ikiwiki/hyperestraier` and re-ran
`--rebuild`ing once more, I'm back to the previous error message. `--rebuild`ing once more, I'm back to the previous error message.
--[[tschwinge]] --[[tschwinge]]
I guess this is fixed now that it uses xapian. :-) --[[Joey]]

View File

@ -1,3 +1,5 @@
[[done]], using xapian-omega! --[[Joey]]
After using it for a while, my feeling is that [[hyperestraier]], as used in After using it for a while, my feeling is that [[hyperestraier]], as used in
the [[plugins/search]] plugin, is not robust enough for ikiwiki. It doesn't the [[plugins/search]] plugin, is not robust enough for ikiwiki. It doesn't
upgrade well, and it has a habit of sig-11 on certain input from time to upgrade well, and it has a habit of sig-11 on certain input from time to

View File

@ -21,15 +21,14 @@ located in /usr/share/ikiwiki/templates by default.
* `inlinepage.tmpl` - Used for adding a page inline in a blog * `inlinepage.tmpl` - Used for adding a page inline in a blog
page. page.
* `archivepage.tmpl` - Used for listing a page in a blog archive page. * `archivepage.tmpl` - Used for listing a page in a blog archive page.
* `estseek.conf` - Not a html template, this is actually a template for
a config file for the [[HyperEstraier]] search engine. If you like you
can read the [[HyperEstraier]] docs and configure it using this.
* `blogpost.tmpl` - Used for a form to add a post to a blog (and a rss/atom links) * `blogpost.tmpl` - Used for a form to add a post to a blog (and a rss/atom links)
* `feedlink.tmpl` - Used to add rss/atom links if blogpost.tmpl is not used. * `feedlink.tmpl` - Used to add rss/atom links if blogpost.tmpl is not used.
* `aggregatepost.tmpl` - Used by the [[plugins/aggregate]] plugin to create * `aggregatepost.tmpl` - Used by the [[plugins/aggregate]] plugin to create
a page for a post. a page for a post.
* `searchform.tmpl` - Used by the [[plugins/search]] plugin to add a search * `searchform.tmpl` - Used by the [[plugins/search]] plugin to add a search
form to wiki pages. form to wiki pages.
* `searchquery.tmpl` - This is an omega template, used by the
[[plugins/search]] plugin.
The [[plugins/pagetemplate]] plugin can allow individual pages to use a The [[plugins/pagetemplate]] plugin can allow individual pages to use a
different template than `page.tmpl`. different template than `page.tmpl`.

View File

@ -1,7 +1,5 @@
<form method="get" action="<TMPL_VAR SEARCHACTION>" id="searchform"> <form method="get" action="<TMPL_VAR SEARCHACTION>" id="searchform">
<div> <div>
<input type="text" name="phrase" value="" size="16" /> <input type="text" name="P" value="" size="16" />
<input type="hidden" name="enc" value="UTF-8" />
<input type="hidden" name="do" value="hyperestraier" />
</div> </div>
</form> </form>

View File

@ -0,0 +1,117 @@
$set{thousand,$.}$set{decimal,.}$setmap{BN,,Any Country,uk,England,fr,France}
${
$def{PREV,
$if{$ne{$topdoc,0},<INPUT TYPE=image NAME="&lt;" ALT="&lt;"
SRC="/images/xapian-omega/prev.png" BORDER=0 HEIGHT=30 WIDTH=30>,
<IMG ALT="" SRC="/images/xapian-omega/prevoff.png" HEIGHT=30 WIDTH=30>}
}
$def{NEXT,
$if{$ne{$last,$msize},<INPUT TYPE=image NAME="&gt;" ALT="&gt;"
SRC="/images/xapian-omega/next.png" BORDER=0 HEIGHT=30 WIDTH=30>,
<IMG ALT="" SRC="/images/xapian-omega/nextoff.png" HEIGHT=30 WIDTH=30>}
}
$def{P,<INPUT TYPE=image NAME="$1" VALUE="$1" SRC="/images/xapian-omega/page-$2.png" BORDER=0$opt{a} ALT="$1">}
$def{PAGE,$if{$gt{$1,9},$if{$gt{$1,99},$P{$1,$div{$1,100}}}$P{$1,$mod{$div{$1,10},10}}}$P{$1,$mod{$1,10}}}
$def{S,<IMG SRC="/images/xapian-omega/page-$2s.png"$opt{a} ALT=$1>}
$def{SPAGE,$if{$gt{$1,9},$if{$gt{$1,99},$S{$1,$div{$1,100}}}$S{$1,$mod{$div{$1,10},10}}}$S{$1,$mod{$1,10}}}
}
$def{PREV,$if{$ne{$topdoc,0},<INPUT TYPE=submit NAME="&lt;" VALUE="Previous">}}
$def{PAGE,<INPUT TYPE=submit NAME="[" VALUE="$1">}
$def{SPAGE,<INPUT TYPE=submit NAME="[" VALUE="$1" DISABLED=disabled>}
$def{NEXT,$if{$ne{$last,$msize},<INPUT TYPE=submit NAME="&gt;" VALUE="Next">}}
<p>
<FORM NAME=P METHOD=GET
ACTION="$html{$env{CGIURL}}" TARGET="_top">
<center>
<INPUT NAME=P VALUE="$html{$query}" SIZE=65>
<INPUT TYPE=SUBMIT VALUE="Search">
<hr>
<SELECT NAME=DEFAULTOP>
<OPTION VALUE=or $if{$eq{$defaultop,or},SELECTED}>Matching any words
<OPTION VALUE=and $if{$eq{$defaultop,and},SELECTED}>Matching all words
</SELECT>
$if{$opt{topterms},
<div title="Suggested terms to add to your query"
style="text-align:left;background:#cfc;border:1px solid green;padding:2px;font:11px verdana$. arial$. helvetica$. sans-serif;">
$map{$topterms,<span style="white-space:nowrap"><INPUT TYPE=checkbox NAME=X VALUE="$prettyterm{$_}" onClick="C(this)">$prettyterm{$_}</span> }
<BR><NOSCRIPT><INPUT TYPE=hidden NAME=ADD VALUE=1></NOSCRIPT>
</div>
}
$or{$html{$error},
$if{$eq{$msize,0},
$if{$query,No documents match your query,
<hr>Searching $nice{$dbsize} documents
},
$if{$not{$msizeexact},
$nice{$add{$topdoc,1}}-$nice{$last} of about $nice{$msize} matches,
$if{$and{$eq{$last,$msize},$eq{$topdoc,0}},
All $nice{$msize} matches,
$nice{$add{$topdoc,1}}$if{$ne{$add{$topdoc,1},$last},-$nice{$last}} of exactly $nice{$msize} matches}
}
<hr>
</center>
$list{$map{$queryterms,$list{$html{$uniq{$unstem{$_}}},<b>,</b>/<b>,</b>}:&nbsp;$nice{$freq{$_}}},Term frequencies: ,$. ,}
<br><small>Search took $time seconds</small>
<table>
$hitlist{<tr><td valign=top>
${<IMG SRC="/images/xapian-omega/score-$div{$percentage,10}.png" ALT="$percentage%" HEIGHT=16 WIDTH=32>}
<div title="$percentage%" style='float:left;width:60px;height:10px;border:1px solid black;margin-top:4px;'>
<div style='width:$div{$mul{$percentage,6},10}px; height:10px; background-color: red;'>
</div></div>
<div style='float:left;margin-top:2px;font-size:x-small;'>
<span title="$html{$date{$field{modtime},%Y-%m-%d %H:%M:%S}}">
Modified:<br><b>$html{$date{$field{modtime},%Y-%m-%d}}</b></span><br>
$if{$field{language},Language: <b>$html{$field{language}}</b><br>}
$if{$field{size},<span title="$html{$field{size}} bytes">Size: <b>$html{$filesize{$field{size}}}</b></span><br>}
</div>
</td>
<td><B><A HREF="$field{url}">$html{$or{$field{caption},$field{title},$field{url},Untitled}}</A></B><BR>
<small>$highlight{$field{sample},$terms}$if{$field{sample},...}</small><br>
<A HREF="$field{url}">$html{$field{url}}</A><br>
<small>
$percentage% relevant$. matching:
<i>$list{$map{$terms,$html{$prettyterm{$_}}},$. ,</i> and <i>}</i></small>${for lynx:}<p></td></tr>}
</table>
<br><center>
${suppress next, prev, and page links if there's only one page}
$if{$ne{$lastpage,1},
$set{a,$if{$opt{pagelink_height}, HEIGHT=$opt{pagelink_height}}$if{$opt{pagelink_width}, WIDTH=$opt{pagelink_width}}}
${1-W ... X-(this)-Y ...}
$set{w,$min{3,$add{$thispage,-1}}}
$set{x,$max{$add{$opt{w},1},$add{$thispage,-3}}}
$set{y,$min{$lastpage,$add{$thispage,8}}}
$PREV
$map{$range{1,$opt{w}},$PAGE{$_}}
$if{$ne{$add{$opt{w},1},$opt{x}},...}
$map{$range{$opt{x},$add{$thispage,-1}},$PAGE{$_}}
$SPAGE{$thispage}
$map{$range{$add{$thispage,1},$opt{y}},$PAGE{$_}}
$if{$ne{$opt{y},$lastpage},...}
$NEXT
}
}}
</center><br>
$if{$dbname,<INPUT TYPE=hidden NAME=DB VALUE="$html{$dbname}">}
$if{$ne{$topdoc,0},<INPUT TYPE=hidden NAME=TOPDOC VALUE=$topdoc>}
$if{$ne{$hitsperpage,10},<INPUT TYPE=hidden NAME=HITSPERPAGE VALUE=$hitsperpage>}
$if{$fmt,<INPUT TYPE=hidden NAME=FMT VALUE="$html{$fmt}">}
$if{$cgi{COLLAPSE},<INPUT TYPE=hidden NAME=COLLAPSE VALUE="$html{$cgi{COLLAPSE}}">}
$if{$queryterms,<INPUT TYPE=hidden NAME=xP VALUE="$html{$queryterms}">}
<INPUT TYPE=hidden NAME=xDB VALUE="$html{$dbname}">
<INPUT TYPE=hidden NAME=xFILTERS VALUE="$html{$filters}">
$list{$relevants,<INPUT TYPE=hidden NAME=R VALUE=",.,">}
$if{$cgi{THRESHOLD},<INPUT TYPE=hidden NAME=THRESHOLD VALUE="$html{$cgi{THRESHOLD}}">}
</FORM>
<hr><div align=right><i><small><a href="http://www.xapian.org/">$html{$version}</a></small></i></div>