more search improvements

master
Joey Hess 2008-06-04 00:38:40 -04:00
parent aefe5dd9cb
commit ce826411b2
5 changed files with 49 additions and 38 deletions

View File

@ -80,13 +80,14 @@ sub filter (@) { #{{{
# Remove any html from text to be indexed.
# TODO: This removes html that is in eg, a markdown pre,
# which should not be removed.
# which should not be removed, really.
if (! defined $scrubber) {
eval q{use HTML::Scrubber};
error($@) if $@;
$scrubber=HTML::Scrubber->new(allow => []);
if (! $@) {
$scrubber=HTML::Scrubber->new(allow => []);
}
}
my $toindex=$scrubber->scrub($params{content});
my $toindex = defined $scrubber ? $scrubber->scrub($params{content}) : $params{content};
# Take 512 characters for a sample, then extend it out
# if it stopped in the middle of a word.

8
debian/NEWS vendored
View File

@ -1,3 +1,11 @@
ikiwiki (2.49) unstable; urgency=low
The search plugin no longer uses hyperestrair. Instead, to use it you
will now need to install xapian-omega, and the Search::Xapian and
HTML::Scrubber perl modules.
-- Joey Hess <joeyh@debian.org> Wed, 04 Jun 2008 00:29:28 -0400
ikiwiki (2.48) unstable; urgency=high
If you allowed password based logins to your wiki, those passwords were

View File

@ -2,15 +2,16 @@
[[tag type/useful]]
This plugin adds full text search to ikiwiki, using the
[xapian](http://xapian.org/) engine and its
[omega](http://xapian.org/docs/omega/overview.html) frontend.
[xapian](http://xapian.org/) engine, its
[omega](http://xapian.org/docs/omega/overview.html) frontend,
and the [[cpan Search::Xapian]] perl module. (The [[cpan HTML::Scrubber]]
perl module will also be used, if available.)
Ikiwiki will handle indexing new and changed page contents, using the
[[cpan Search::Xapian]] perl modules. Note that it indexes page contents
before they are preprocessed and converted to html, as this tends to
produce less noisy search results. Also, since it only indexes page
contents, files copied by the [[rawhtml]] plugin will not be indexed, nor
will other types of data files.
Ikiwiki will handle indexing new and changed page contents. Note that it
indexes page contents before they are preprocessed and converted to html,
as this tends to produce less noisy search results. Also, since it only
indexes page contents, files copied by the [[rawhtml]] plugin will not be
indexed, nor will other types of data files.
There is one setting you may need to use in the config file. `omega_cgi`
should point to the location of the omega cgi program. The default location

View File

@ -0,0 +1,5 @@
The [[plugin/search]] plugin could use xapian terms to allow some special
searches. For example, "title:foo", or "link:somepage", or "author:foo", or
"copyright:GPL".
Reference: <http://xapian.org/docs/omega/termprefixes.html>

View File

@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2008-05-30 17:36-0400\n"
"POT-Creation-Date: 2008-06-04 00:33-0400\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
@ -50,9 +50,9 @@ msgid "%s is not an editable page"
msgstr ""
#: ../IkiWiki/CGI.pm:432 ../IkiWiki/Plugin/brokenlinks.pm:24
#: ../IkiWiki/Plugin/inline.pm:265 ../IkiWiki/Plugin/opendiscussion.pm:17
#: ../IkiWiki/Plugin/inline.pm:266 ../IkiWiki/Plugin/opendiscussion.pm:17
#: ../IkiWiki/Plugin/orphans.pm:28 ../IkiWiki/Render.pm:95
#: ../IkiWiki/Render.pm:166
#: ../IkiWiki/Render.pm:162
msgid "discussion"
msgstr ""
@ -237,33 +237,33 @@ msgstr ""
msgid "failed to determine size of image %s"
msgstr ""
#: ../IkiWiki/Plugin/inline.pm:46
#: ../IkiWiki/Plugin/inline.pm:47
msgid "Must specify url to wiki with --url when using --rss or --atom"
msgstr ""
#: ../IkiWiki/Plugin/inline.pm:105
#: ../IkiWiki/Plugin/inline.pm:106
msgid "missing pages parameter"
msgstr ""
#: ../IkiWiki/Plugin/inline.pm:153
#: ../IkiWiki/Plugin/inline.pm:154
#, perl-format
msgid "unknown sort type %s"
msgstr ""
#: ../IkiWiki/Plugin/inline.pm:224
#: ../IkiWiki/Plugin/inline.pm:225
msgid "Add a new post titled:"
msgstr ""
#: ../IkiWiki/Plugin/inline.pm:240
#: ../IkiWiki/Plugin/inline.pm:241
#, perl-format
msgid "nonexistant template %s"
msgstr ""
#: ../IkiWiki/Plugin/inline.pm:273 ../IkiWiki/Render.pm:99
#: ../IkiWiki/Plugin/inline.pm:274 ../IkiWiki/Render.pm:99
msgid "Discussion"
msgstr ""
#: ../IkiWiki/Plugin/inline.pm:500
#: ../IkiWiki/Plugin/inline.pm:504
msgid "RPC::XML::Client not found, not pinging"
msgstr ""
@ -476,17 +476,13 @@ msgstr ""
msgid "(Diff truncated)"
msgstr ""
#: ../IkiWiki/Plugin/search.pm:34
#: ../IkiWiki/Plugin/search.pm:23
#, perl-format
msgid "Must specify %s when using the search plugin"
msgstr ""
#: ../IkiWiki/Plugin/search.pm:58
msgid "cleaning hyperestraier search index"
msgstr ""
#: ../IkiWiki/Plugin/search.pm:64
msgid "updating hyperestraier search index"
#: ../IkiWiki/Plugin/search.pm:36
msgid "search"
msgstr ""
#: ../IkiWiki/Plugin/shortcut.pm:18
@ -595,47 +591,47 @@ msgstr ""
msgid "getctime not implemented"
msgstr ""
#: ../IkiWiki/Render.pm:283 ../IkiWiki/Render.pm:304
#: ../IkiWiki/Render.pm:279 ../IkiWiki/Render.pm:300
#, perl-format
msgid "skipping bad filename %s"
msgstr ""
#: ../IkiWiki/Render.pm:358
#: ../IkiWiki/Render.pm:354
#, perl-format
msgid "removing old page %s"
msgstr ""
#: ../IkiWiki/Render.pm:398
#: ../IkiWiki/Render.pm:394
#, perl-format
msgid "scanning %s"
msgstr ""
#: ../IkiWiki/Render.pm:403
#: ../IkiWiki/Render.pm:399
#, perl-format
msgid "rendering %s"
msgstr ""
#: ../IkiWiki/Render.pm:424
#: ../IkiWiki/Render.pm:420
#, perl-format
msgid "rendering %s, which links to %s"
msgstr ""
#: ../IkiWiki/Render.pm:445
#: ../IkiWiki/Render.pm:441
#, perl-format
msgid "rendering %s, which depends on %s"
msgstr ""
#: ../IkiWiki/Render.pm:484
#: ../IkiWiki/Render.pm:480
#, perl-format
msgid "rendering %s, to update its backlinks"
msgstr ""
#: ../IkiWiki/Render.pm:496
#: ../IkiWiki/Render.pm:492
#, perl-format
msgid "removing %s, no longer rendered by %s"
msgstr ""
#: ../IkiWiki/Render.pm:520
#: ../IkiWiki/Render.pm:516
#, perl-format
msgid "ikiwiki: cannot render %s"
msgstr ""