search: Converted to use xapian-omega.
Everything is done except for the actual indexing. I plan to do incremental indexing as pages change.master
parent
c688863cf1
commit
8a6a5320ed
|
@ -16,6 +16,7 @@ perl -MCPAN -e 'install Bundle::IkiWiki::Extras'
|
|||
|
||||
=head1 CONTENTS
|
||||
|
||||
Search::Xapian
|
||||
Authen::Passphrase
|
||||
RPC::XML
|
||||
File::MimeInfo
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/usr/bin/perl
|
||||
# hyperestraier search engine plugin
|
||||
# xapian-omega search engine plugin
|
||||
package IkiWiki::Plugin::search;
|
||||
|
||||
use warnings;
|
||||
|
@ -7,33 +7,32 @@ use strict;
|
|||
use IkiWiki 2.00;
|
||||
|
||||
sub import { #{{{
|
||||
hook(type => "getopt", id => "hyperestraier",
|
||||
call => \&getopt);
|
||||
hook(type => "checkconfig", id => "hyperestraier",
|
||||
call => \&checkconfig);
|
||||
hook(type => "pagetemplate", id => "hyperestraier",
|
||||
call => \&pagetemplate);
|
||||
hook(type => "delete", id => "hyperestraier",
|
||||
call => \&delete);
|
||||
hook(type => "change", id => "hyperestraier",
|
||||
call => \&change);
|
||||
hook(type => "cgi", id => "hyperestraier",
|
||||
call => \&cgi);
|
||||
hook(type => "checkconfig", id => "search", call => \&checkconfig);
|
||||
hook(type => "pagetemplate", id => "search", call => \&pagetemplate);
|
||||
hook(type => "delete", id => "search", call => \&delete);
|
||||
hook(type => "change", id => "search", call => \&change);
|
||||
hook(type => "cgi", id => "search", call => \&cgi);
|
||||
} # }}}
|
||||
|
||||
sub getopt () { #{{{
|
||||
eval q{use Getopt::Long};
|
||||
error($@) if $@;
|
||||
Getopt::Long::Configure('pass_through');
|
||||
GetOptions("estseek=s" => \$config{estseek});
|
||||
} #}}}
|
||||
|
||||
sub checkconfig () { #{{{
|
||||
foreach my $required (qw(url cgiurl)) {
|
||||
if (! length $config{$required}) {
|
||||
error(sprintf(gettext("Must specify %s when using the search plugin"), $required));
|
||||
}
|
||||
}
|
||||
|
||||
if (! exists $config{omega_cgi}) {
|
||||
$config{omega_cgi}="/usr/lib/cgi-bin/omega/omega";
|
||||
}
|
||||
|
||||
if (! -e $config{wikistatedir}."/xapian" || $config{rebuild}) {
|
||||
writefile("omega.conf", $config{wikistatedir}."/xapian",
|
||||
"database_dir .\n".
|
||||
"template_dir ./templates\n");
|
||||
writefile("query", $config{wikistatedir}."/xapian/templates",
|
||||
IkiWiki::misctemplate(gettext("search"),
|
||||
readfile(IkiWiki::template_file("searchquery.tmpl"))));
|
||||
}
|
||||
} #}}}
|
||||
|
||||
my $form;
|
||||
|
@ -55,93 +54,22 @@ sub pagetemplate (@) { #{{{
|
|||
} #}}}
|
||||
|
||||
sub delete (@) { #{{{
|
||||
debug(gettext("cleaning hyperestraier search index"));
|
||||
estcmd("purge -cl");
|
||||
estcfg();
|
||||
debug(gettext("cleaning xapian search index"));
|
||||
} #}}}
|
||||
|
||||
sub change (@) { #{{{
|
||||
debug(gettext("updating hyperestraier search index"));
|
||||
estcmd("gather -cm -bc -cl -sd",
|
||||
map {
|
||||
map {
|
||||
Encode::encode_utf8($config{destdir}."/".$_)
|
||||
} @{$renderedfiles{pagename($_)}};
|
||||
} @_
|
||||
);
|
||||
estcfg();
|
||||
debug(gettext("updating xapian search index"));
|
||||
} #}}}
|
||||
|
||||
sub cgi ($) { #{{{
|
||||
my $cgi=shift;
|
||||
|
||||
if (defined $cgi->param('phrase') || defined $cgi->param("navi")) {
|
||||
if (defined $cgi->param('P')) {
|
||||
# only works for GET requests
|
||||
chdir("$config{wikistatedir}/hyperestraier") || error("chdir: $!");
|
||||
exec("./".IkiWiki::basename($config{cgiurl})) || error("estseek.cgi failed");
|
||||
}
|
||||
} #}}}
|
||||
|
||||
my $configured=0;
|
||||
sub estcfg () { #{{{
|
||||
return if $configured;
|
||||
$configured=1;
|
||||
|
||||
my $estdir="$config{wikistatedir}/hyperestraier";
|
||||
my $cgi=IkiWiki::basename($config{cgiurl});
|
||||
$cgi=~s/\..*$//;
|
||||
|
||||
my $newfile="$estdir/$cgi.tmpl.new";
|
||||
my $cleanup = sub { unlink($newfile) };
|
||||
open(TEMPLATE, ">:utf8", $newfile) || error("open $newfile: $!", $cleanup);
|
||||
print TEMPLATE IkiWiki::misctemplate("search",
|
||||
"<!--ESTFORM-->\n\n<!--ESTRESULT-->\n\n<!--ESTINFO-->\n\n",
|
||||
forcebaseurl => IkiWiki::dirname($config{cgiurl})."/") ||
|
||||
error("write $newfile: $!", $cleanup);
|
||||
close TEMPLATE || error("save $newfile: $!", $cleanup);
|
||||
rename($newfile, "$estdir/$cgi.tmpl") ||
|
||||
error("rename $newfile: $!", $cleanup);
|
||||
|
||||
$newfile="$estdir/$cgi.conf";
|
||||
open(TEMPLATE, ">$newfile") || error("open $newfile: $!", $cleanup);
|
||||
my $template=template("estseek.conf");
|
||||
eval q{use Cwd 'abs_path'};
|
||||
$template->param(
|
||||
index => $estdir,
|
||||
tmplfile => "$estdir/$cgi.tmpl",
|
||||
destdir => abs_path($config{destdir}),
|
||||
url => $config{url},
|
||||
);
|
||||
print TEMPLATE $template->output || error("write $newfile: $!", $cleanup);
|
||||
close TEMPLATE || error("save $newfile: $!", $cleanup);
|
||||
rename($newfile, "$estdir/$cgi.conf") ||
|
||||
error("rename $newfile: $!", $cleanup);
|
||||
|
||||
$cgi="$estdir/".IkiWiki::basename($config{cgiurl});
|
||||
unlink($cgi);
|
||||
my $estseek = defined $config{estseek} ? $config{estseek} : '/usr/lib/estraier/estseek.cgi';
|
||||
symlink($estseek, $cgi) || error("symlink $estseek $cgi: $!");
|
||||
} # }}}
|
||||
|
||||
sub estcmd ($;@) { #{{{
|
||||
my @params=split(' ', shift);
|
||||
push @params, "-cl", "$config{wikistatedir}/hyperestraier";
|
||||
if (@_) {
|
||||
push @params, "-";
|
||||
}
|
||||
|
||||
my $pid=open(CHILD, "|-");
|
||||
if ($pid) {
|
||||
# parent
|
||||
foreach (@_) {
|
||||
print CHILD "$_\n";
|
||||
}
|
||||
close(CHILD) || print STDERR "estcmd @params exited nonzero: $?\n";
|
||||
}
|
||||
else {
|
||||
# child
|
||||
open(STDOUT, "/dev/null"); # shut it up (closing won't work)
|
||||
exec("estcmd", @params) || error("can't run estcmd");
|
||||
chdir("$config{wikistatedir}/xapian") || error("chdir: $!");
|
||||
$ENV{OMEGA_CONFIG_FILE}="./omega.conf";
|
||||
$ENV{CGIURL}=$config{cgiurl},
|
||||
exec($config{omega_cgi}) || error("$config{omega_cgi} failed: $!");
|
||||
}
|
||||
} #}}}
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ ikiwiki (2.49) UNRELEASED; urgency=low
|
|||
* ikiwiki-mass-rebuild: Don't trust $! when setting $)
|
||||
* inline: The optimisation in 2.41 broke nested inlines. Detect those
|
||||
and avoid overoptimising.
|
||||
* search: Converted to use xapian-omega.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Fri, 30 May 2008 19:08:54 -0400
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ Package: ikiwiki
|
|||
Architecture: all
|
||||
Depends: ${perl:Depends}, markdown | libtext-markdown-perl, libhtml-scrubber-perl, libhtml-template-perl, libhtml-parser-perl, liburi-perl
|
||||
Recommends: gcc | c-compiler, libc6-dev | libc-dev, subversion | git-core (>= 1:1.5.0) | tla | bzr (>= 0.91) | mercurial | monotone (>= 0.38), libxml-simple-perl, libnet-openid-consumer-perl, liblwpx-paranoidagent-perl, libtimedate-perl, libcgi-formbuilder-perl (>= 3.05), libcgi-session-perl (>= 4.14-1), libmail-sendmail-perl, libauthen-passphrase-perl
|
||||
Suggests: viewvc | gitweb | viewcvs, hyperestraier, librpc-xml-perl, libtext-wikiformat-perl, python, python-docutils, polygen, tidy, libxml-feed-perl, libmailtools-perl, perlmagick, libfile-mimeinfo-perl, libcrypt-ssleay-perl, liblocale-gettext-perl (>= 1.05-1), libtext-typography-perl, libtext-csv-perl, libdigest-sha1-perl, graphviz, libnet-amazon-s3-perl
|
||||
Suggests: viewvc | gitweb | viewcvs, libsearch-xapian-perl, xapian-omega, librpc-xml-perl, libtext-wikiformat-perl, python, python-docutils, polygen, tidy, libxml-feed-perl, libmailtools-perl, perlmagick, libfile-mimeinfo-perl, libcrypt-ssleay-perl, liblocale-gettext-perl (>= 1.05-1), libtext-typography-perl, libtext-csv-perl, libdigest-sha1-perl, graphviz, libnet-amazon-s3-perl
|
||||
Conflicts: ikiwiki-plugin-table
|
||||
Replaces: ikiwiki-plugin-table
|
||||
Provides: ikiwiki-plugin-table
|
||||
|
|
|
@ -158,8 +158,8 @@ Well, sorta. Rather than implementing YA history browser, it can link to
|
|||
|
||||
### Full text search
|
||||
|
||||
ikiwiki can use the [[HyperEstraier]] search engine to add powerful
|
||||
full text search capabilities to your wiki.
|
||||
ikiwiki can use the xapian search engine to add powerful
|
||||
full text [[plugins/search]] capabilities to your wiki.
|
||||
|
||||
### [[w3mmode]]
|
||||
|
||||
|
|
|
@ -156,9 +156,9 @@ use IkiWiki::Setup::Standard {
|
|||
# base page.
|
||||
#tagbase => "tag",
|
||||
|
||||
# For use with the search plugin if your estseek.cgi is located
|
||||
# For use with the search plugin if the omega cgi is located
|
||||
# somewhere else.
|
||||
#estseek => "/usr/lib/estraier/estseek.cgi",
|
||||
#omega_cgi => "/usr/lib/cgi-bin/omega/omega",
|
||||
|
||||
# For use with the openid plugin, to give an url to a page users
|
||||
# can use to signup for an OpenID.
|
||||
|
|
|
@ -1,12 +1,17 @@
|
|||
[[template id=plugin name=search author="[[Joey]]"]]
|
||||
[[tag type/useful]]
|
||||
|
||||
This plugin is included in ikiwiki, but is not enabled by default. It adds
|
||||
full text search to ikiwiki, using the [[HyperEstraier]] engine.
|
||||
This plugin adds full text search to ikiwiki, using the
|
||||
[xapian](http://xapian.org/) engine and its
|
||||
[omega](http://xapian.org/docs/omega/overview.html) frontend.
|
||||
|
||||
It's possible to configure HyperEstraier via one of ikiwiki's
|
||||
[[templates|wikitemplates]], but for most users, no configuration should be
|
||||
needed aside from enabling the plugin.
|
||||
Ikiwiki will handle indexing new and changed page contents, using the
|
||||
[[cpan Search::Xapian]] perl modules. Note that it indexes page contents
|
||||
before they are preprocessed and converted to html, as this tends to
|
||||
produce less noisy search results. Also, since it only indexes page
|
||||
contents, files copied by the [[rawhtml]] plugin will not be indexed, nor
|
||||
will other types of data files.
|
||||
|
||||
This plugin has a configuration option. To change the path to estseek.cgi,
|
||||
set `--estseek=/path/to/estseek.cgi`
|
||||
There is one setting you may need to use in the config file. `omega_cgi`
|
||||
should point to the location of the omega cgi program. The default location
|
||||
is `/usr/lib/cgi-bin/omega/omega`.
|
||||
|
|
|
@ -42,3 +42,5 @@ Now I did a `rm -rf ~wiki/wiki/.ikiwiki/hyperestraier` and re-ran
|
|||
`--rebuild`ing once more, I'm back to the previous error message.
|
||||
|
||||
--[[tschwinge]]
|
||||
|
||||
I guess this is fixed now that it uses xapian. :-) --[[Joey]]
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
[[done]], using xapian-omega! --[[Joey]]
|
||||
|
||||
After using it for a while, my feeling is that [[hyperestraier]], as used in
|
||||
the [[plugins/search]] plugin, is not robust enough for ikiwiki. It doesn't
|
||||
upgrade well, and it has a habit of sig-11 on certain input from time to
|
||||
|
|
|
@ -21,15 +21,14 @@ located in /usr/share/ikiwiki/templates by default.
|
|||
* `inlinepage.tmpl` - Used for adding a page inline in a blog
|
||||
page.
|
||||
* `archivepage.tmpl` - Used for listing a page in a blog archive page.
|
||||
* `estseek.conf` - Not a html template, this is actually a template for
|
||||
a config file for the [[HyperEstraier]] search engine. If you like you
|
||||
can read the [[HyperEstraier]] docs and configure it using this.
|
||||
* `blogpost.tmpl` - Used for a form to add a post to a blog (and a rss/atom links)
|
||||
* `feedlink.tmpl` - Used to add rss/atom links if blogpost.tmpl is not used.
|
||||
* `aggregatepost.tmpl` - Used by the [[plugins/aggregate]] plugin to create
|
||||
a page for a post.
|
||||
* `searchform.tmpl` - Used by the [[plugins/search]] plugin to add a search
|
||||
form to wiki pages.
|
||||
* `searchquery.tmpl` - This is an omega template, used by the
|
||||
[[plugins/search]] plugin.
|
||||
|
||||
The [[plugins/pagetemplate]] plugin can allow individual pages to use a
|
||||
different template than `page.tmpl`.
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
<form method="get" action="<TMPL_VAR SEARCHACTION>" id="searchform">
|
||||
<div>
|
||||
<input type="text" name="phrase" value="" size="16" />
|
||||
<input type="hidden" name="enc" value="UTF-8" />
|
||||
<input type="hidden" name="do" value="hyperestraier" />
|
||||
<input type="text" name="P" value="" size="16" />
|
||||
</div>
|
||||
</form>
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
$set{thousand,$.}$set{decimal,.}$setmap{BN,,Any Country,uk,England,fr,France}
|
||||
${
|
||||
$def{PREV,
|
||||
$if{$ne{$topdoc,0},<INPUT TYPE=image NAME="<" ALT="<"
|
||||
SRC="/images/xapian-omega/prev.png" BORDER=0 HEIGHT=30 WIDTH=30>,
|
||||
<IMG ALT="" SRC="/images/xapian-omega/prevoff.png" HEIGHT=30 WIDTH=30>}
|
||||
}
|
||||
|
||||
$def{NEXT,
|
||||
$if{$ne{$last,$msize},<INPUT TYPE=image NAME=">" ALT=">"
|
||||
SRC="/images/xapian-omega/next.png" BORDER=0 HEIGHT=30 WIDTH=30>,
|
||||
<IMG ALT="" SRC="/images/xapian-omega/nextoff.png" HEIGHT=30 WIDTH=30>}
|
||||
}
|
||||
|
||||
$def{P,<INPUT TYPE=image NAME="$1" VALUE="$1" SRC="/images/xapian-omega/page-$2.png" BORDER=0$opt{a} ALT="$1">}
|
||||
$def{PAGE,$if{$gt{$1,9},$if{$gt{$1,99},$P{$1,$div{$1,100}}}$P{$1,$mod{$div{$1,10},10}}}$P{$1,$mod{$1,10}}}
|
||||
|
||||
$def{S,<IMG SRC="/images/xapian-omega/page-$2s.png"$opt{a} ALT=$1>}
|
||||
$def{SPAGE,$if{$gt{$1,9},$if{$gt{$1,99},$S{$1,$div{$1,100}}}$S{$1,$mod{$div{$1,10},10}}}$S{$1,$mod{$1,10}}}
|
||||
}
|
||||
|
||||
$def{PREV,$if{$ne{$topdoc,0},<INPUT TYPE=submit NAME="<" VALUE="Previous">}}
|
||||
|
||||
$def{PAGE,<INPUT TYPE=submit NAME="[" VALUE="$1">}
|
||||
|
||||
$def{SPAGE,<INPUT TYPE=submit NAME="[" VALUE="$1" DISABLED=disabled>}
|
||||
|
||||
$def{NEXT,$if{$ne{$last,$msize},<INPUT TYPE=submit NAME=">" VALUE="Next">}}
|
||||
|
||||
<p>
|
||||
|
||||
<FORM NAME=P METHOD=GET
|
||||
ACTION="$html{$env{CGIURL}}" TARGET="_top">
|
||||
<center>
|
||||
<INPUT NAME=P VALUE="$html{$query}" SIZE=65>
|
||||
<INPUT TYPE=SUBMIT VALUE="Search">
|
||||
<hr>
|
||||
<SELECT NAME=DEFAULTOP>
|
||||
<OPTION VALUE=or $if{$eq{$defaultop,or},SELECTED}>Matching any words
|
||||
<OPTION VALUE=and $if{$eq{$defaultop,and},SELECTED}>Matching all words
|
||||
</SELECT>
|
||||
$if{$opt{topterms},
|
||||
<div title="Suggested terms to add to your query"
|
||||
style="text-align:left;background:#cfc;border:1px solid green;padding:2px;font:11px verdana$. arial$. helvetica$. sans-serif;">
|
||||
$map{$topterms,<span style="white-space:nowrap"><INPUT TYPE=checkbox NAME=X VALUE="$prettyterm{$_}" onClick="C(this)">$prettyterm{$_}</span> }
|
||||
<BR><NOSCRIPT><INPUT TYPE=hidden NAME=ADD VALUE=1></NOSCRIPT>
|
||||
</div>
|
||||
}
|
||||
$or{$html{$error},
|
||||
$if{$eq{$msize,0},
|
||||
$if{$query,No documents match your query,
|
||||
<hr>Searching $nice{$dbsize} documents
|
||||
},
|
||||
$if{$not{$msizeexact},
|
||||
$nice{$add{$topdoc,1}}-$nice{$last} of about $nice{$msize} matches,
|
||||
$if{$and{$eq{$last,$msize},$eq{$topdoc,0}},
|
||||
All $nice{$msize} matches,
|
||||
$nice{$add{$topdoc,1}}$if{$ne{$add{$topdoc,1},$last},-$nice{$last}} of exactly $nice{$msize} matches}
|
||||
}
|
||||
<hr>
|
||||
</center>
|
||||
$list{$map{$queryterms,$list{$html{$uniq{$unstem{$_}}},<b>,</b>/<b>,</b>}: $nice{$freq{$_}}},Term frequencies: ,$. ,}
|
||||
<br><small>Search took $time seconds</small>
|
||||
<table>
|
||||
$hitlist{<tr><td valign=top>
|
||||
${<IMG SRC="/images/xapian-omega/score-$div{$percentage,10}.png" ALT="$percentage%" HEIGHT=16 WIDTH=32>}
|
||||
<div title="$percentage%" style='float:left;width:60px;height:10px;border:1px solid black;margin-top:4px;'>
|
||||
<div style='width:$div{$mul{$percentage,6},10}px; height:10px; background-color: red;'>
|
||||
</div></div>
|
||||
<div style='float:left;margin-top:2px;font-size:x-small;'>
|
||||
<span title="$html{$date{$field{modtime},%Y-%m-%d %H:%M:%S}}">
|
||||
Modified:<br><b>$html{$date{$field{modtime},%Y-%m-%d}}</b></span><br>
|
||||
$if{$field{language},Language: <b>$html{$field{language}}</b><br>}
|
||||
$if{$field{size},<span title="$html{$field{size}} bytes">Size: <b>$html{$filesize{$field{size}}}</b></span><br>}
|
||||
</div>
|
||||
</td>
|
||||
<td><B><A HREF="$field{url}">$html{$or{$field{caption},$field{title},$field{url},Untitled}}</A></B><BR>
|
||||
<small>$highlight{$field{sample},$terms}$if{$field{sample},...}</small><br>
|
||||
<A HREF="$field{url}">$html{$field{url}}</A><br>
|
||||
<small>
|
||||
$percentage% relevant$. matching:
|
||||
<i>$list{$map{$terms,$html{$prettyterm{$_}}},$. ,</i> and <i>}</i></small>${for lynx:}<p></td></tr>}
|
||||
</table>
|
||||
|
||||
<br><center>
|
||||
|
||||
${suppress next, prev, and page links if there's only one page}
|
||||
$if{$ne{$lastpage,1},
|
||||
$set{a,$if{$opt{pagelink_height}, HEIGHT=$opt{pagelink_height}}$if{$opt{pagelink_width}, WIDTH=$opt{pagelink_width}}}
|
||||
|
||||
${1-W ... X-(this)-Y ...}
|
||||
$set{w,$min{3,$add{$thispage,-1}}}
|
||||
$set{x,$max{$add{$opt{w},1},$add{$thispage,-3}}}
|
||||
$set{y,$min{$lastpage,$add{$thispage,8}}}
|
||||
$PREV
|
||||
$map{$range{1,$opt{w}},$PAGE{$_}}
|
||||
$if{$ne{$add{$opt{w},1},$opt{x}},...}
|
||||
$map{$range{$opt{x},$add{$thispage,-1}},$PAGE{$_}}
|
||||
$SPAGE{$thispage}
|
||||
$map{$range{$add{$thispage,1},$opt{y}},$PAGE{$_}}
|
||||
$if{$ne{$opt{y},$lastpage},...}
|
||||
$NEXT
|
||||
}
|
||||
}}
|
||||
</center><br>
|
||||
$if{$dbname,<INPUT TYPE=hidden NAME=DB VALUE="$html{$dbname}">}
|
||||
$if{$ne{$topdoc,0},<INPUT TYPE=hidden NAME=TOPDOC VALUE=$topdoc>}
|
||||
$if{$ne{$hitsperpage,10},<INPUT TYPE=hidden NAME=HITSPERPAGE VALUE=$hitsperpage>}
|
||||
$if{$fmt,<INPUT TYPE=hidden NAME=FMT VALUE="$html{$fmt}">}
|
||||
$if{$cgi{COLLAPSE},<INPUT TYPE=hidden NAME=COLLAPSE VALUE="$html{$cgi{COLLAPSE}}">}
|
||||
$if{$queryterms,<INPUT TYPE=hidden NAME=xP VALUE="$html{$queryterms}">}
|
||||
<INPUT TYPE=hidden NAME=xDB VALUE="$html{$dbname}">
|
||||
<INPUT TYPE=hidden NAME=xFILTERS VALUE="$html{$filters}">
|
||||
$list{$relevants,<INPUT TYPE=hidden NAME=R VALUE=",.,">}
|
||||
$if{$cgi{THRESHOLD},<INPUT TYPE=hidden NAME=THRESHOLD VALUE="$html{$cgi{THRESHOLD}}">}
|
||||
</FORM>
|
||||
<hr><div align=right><i><small><a href="http://www.xapian.org/">$html{$version}</a></small></i></div>
|
Loading…
Reference in New Issue