From db0dea38c6923353072efb805d5a07aa31f2b5d9 Mon Sep 17 00:00:00 2001 From: mathdesc Date: Sat, 18 Aug 2012 20:07:22 -0400 Subject: [PATCH] Filecheck hacking and measures ... nasty --- .../discussion.mdwn | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/doc/bugs/Slow_Filecheck_attachments___34__snails_it_all__34__/discussion.mdwn b/doc/bugs/Slow_Filecheck_attachments___34__snails_it_all__34__/discussion.mdwn index 6684d79ee..d0582cbff 100644 --- a/doc/bugs/Slow_Filecheck_attachments___34__snails_it_all__34__/discussion.mdwn +++ b/doc/bugs/Slow_Filecheck_attachments___34__snails_it_all__34__/discussion.mdwn @@ -1,3 +1,4 @@ +##Foreword : Disabling of filecheck is not actually possible because btw it cause the attachment.pm to malfunction and any of pagespec that could contain a *mimetype* condition. @@ -16,3 +17,116 @@ sub import { } +---- + +## How bad is it ? + +So I tried on three pages to inline !mimetype(image/*) while I allowed attachment of mimetype(image/*) + +My profiling tests in the bug report shows that most of the time is spend in the "Fallback using file" block code, +I tried to comment that block and see how it'll perform. Obviously this is much much faster ... but is the mimetype +discovered using only *File::MimeInfo* ? + + +Dumping some strings before return to STDERR, rebuilding . This is just a [[!toggle id="code-test" text="dumpdebug adding"]] + +[[!toggleable id="code-test" text=""" +
+sub match_mimetype ($$;@) {
+        my $page=shift;
+        my $wanted=shift;
+
+        my %params=@_;
+        my $file=exists $params{file} ? $params{file} : IkiWiki::srcfile($IkiWiki::pagesources{$page});
+        if (! defined $file) {
+                return IkiWiki::ErrorReason->new("file does not exist");
+        }
+
+        # Get the mime type.
+        #
+        # First, try File::Mimeinfo. This is fast, but doesn't recognise
+        # all files.
+        eval q{use File::MimeInfo::Magic};
+        my $mimeinfo_ok=! $@;
+        my $mimetype;
+        print STDERR " --- match_mimetype (".$file.")\n";
+        if ($mimeinfo_ok) {
+                my $mimetype=File::MimeInfo::Magic::magic($file);
+        }
+
+        # Fall back to using file, which has a more complete
+        # magic database.
+        #if (! defined $mimetype) {
+        #       open(my $file_h, "-|", "file", "-bi", $file);
+        #       $mimetype=<$file_h>;
+        #       chomp $mimetype;
+        #       close $file_h;
+        #}
+
+        if (! defined $mimetype || $mimetype !~s /;.*//) {
+                # Fall back to default value.
+                $mimetype=File::MimeInfo::Magic::default($file)
+                        if $mimeinfo_ok;
+                if (! defined $mimetype) {
+                        $mimetype="unknown";
+                }
+        }
+
+        my $regexp=IkiWiki::glob2re($wanted);
+        if ($mimetype!~$regexp) {
+                 print STDERR " xxx MIME unknown ($mimetype - $wanted - $regexp ) \n";
+                return IkiWiki::FailReason->new("file MIME type is $mimetype, not $wanted");
+        }
+        else {
+                print STDERR " vvv MIME found\n";
+                return IkiWiki::SuccessReason->new("file MIME type is $mimetype");
+        }
+}
+
+"""]] + +The results dump to stderr (or a file called... 'say *mime*) looks like this : +
+--- match_mimetype (/usr/share/ikiwiki/attachment/ikiwiki/jquery.fileupload-ui.js)
+ xxx MIME unknown (text/plain - image/* - (?i-xsm:^image\/.*$) )
+ --- match_mimetype (/usr/share/ikiwiki/locale/fr/directives/ikiwiki/directive/fortune.mdwn)
+ xxx MIME unknown (text/plain - image/* - (?i-xsm:^image\/.*$) )
+ --- match_mimetype (/usr/share/ikiwiki/locale/fr/basewiki/shortcuts.mdwn)
+ xxx MIME unknown (text/plain - image/* - (?i-xsm:^image\/.*$) 
+ --- match_mimetype (/usr/share/ikiwiki/smiley/smileys/alert.png)
+ xxx MIME unknown (application/octet-stream - image/* - (?i-xsm:^image\/.*$) )
+ --- match_mimetype (/usr/share/ikiwiki/attachment/ikiwiki/images/ui-bg_flat_75_ffffff_40x100.png)
+ xxx MIME unknown (application/octet-stream - image/* - (?i-xsm:^image\/.*$) 
+
+ +--- prepend signals the file on analysis
+xxx prepend signals a returns failure : mime is unknown, the match is a failure
+vvv prepend signals a return success.
+ + +This is nasty-scary results ! Something missed me or this mime-filecheck is plain nuts ? + +*Question 1* : How many files have been analysed : **3055** (yet on a tiny tiny wiki) +
grep "^ --- " mime | wc -l
+3055
+
+ +*Question 2* : How many time it fails : *all the time* +
+ grep "^ xxx " mime | wc -l
+3055
+
+ +*Question 1bis* : Doh btw , how many files have been re-analysed ? ** 2835 ** OMG !! +
grep "^ --- " mime | sort -u | wc -l
+220
+
+ +## Conclusion + +- Only the system command *file -bi* works. While it is **should** be easy on the cpu , it's also hard on the I/O -> VM :( +- Something nasty with the mime implementation and/or my system configuration -> Hints ? :D +- Need to cache during the rebuild : a same page needs not being rechecked for its mime while it's locked ! + + +--mathdesc