[Sugar-devel] [PATCH sugar-0.84] journal scan of external media

Tomeu Vizoso tomeu at sugarlabs.org
Wed Sep 8 03:20:21 EDT 2010


On Wed, Sep 8, 2010 at 09:00, James Cameron <quozl at laptop.org> wrote:
> On Tue, Sep 07, 2010 at 01:18:11PM +0200, Sascha Silbe wrote:
>> Agreed. I wonder if we could somehow wrap os.walk() in a single,
>> low-priority idle task. Alternatively call find in a subprocess and
>> use gobject.io_add_watch(). Especially if we want to continue indexing
>> symlinked directories this would be a good approach (see below).
>
> I've reworked it to be as asynchronous as I can make it without moving
> into a separate thread or process.
>
> - the progress bar updates smoothly during the filesystem scan, except
>  when a system call (a single os.listdir or os.stat) is held up due
>  to media or scheduling delays,
>
> - the user interface is responsive during the filesystem scan,
>
> - when an I/O error retry is happening on a USB HDD, the old code
>  would not respond quickly to user cancelling the view, since there
>  were thousands of idle tasks to get through before the event queue
>  was checked, the new code responds to the event as soon as the current
>  retry times out,
>
> - normal expected I/O errors are handled and ignored.
>
> The patch below is a rewrite of the function _recurse_dir, and I've
> replaced tabs with spaces only because the indenting changes make the
> patch hard to read otherwise.
>
> I'm not asking for the patch to be accepted as is, I presume I'd be
> asked to break it down into ten different patches so that each change
> is explained in a different way to how my thought processes proceeded.

FWIW, I think this should go in a single patch because it doesn't make
sense to cherry-pick pieces of a single refactoring/rewrite.

About the tabs change, the -w flag to git diff and blame can cope with
it, but I think when someone finds tabs should just push that change
inmediately. (Though I have grepped through master and have only found
tabs in invites.py).

> I think it needs to be split into smaller functions,

+1

Regards,

Tomeu

> and it doesn't
> address the issue of relative recursive links.
>
> What I'm asking for is a review of the new code so far.
>
> The old code was recursive; by calling itself through the gobject idle
> add.  Each idle task was called once.
>
> The new code is called by the idle loop until it returns False.  There
> is only one idle task, not one per directory.
>
> The new code maintains a list of pending directories and files.  The
> pending directory list is initialised with the mount point.  The pending
> files are processed one call at a time until they are exhausted, then
> the pending directories are processed once.  Eventually the lists become
> empty and the result set is ready.
>
> In effect, a pair of lists are used for synthetic recursion.
>
> diff --git a/src/jarabe/journal/model.py b/src/jarabe/journal/model.py
> index 50e8dc1..6ced07c 100644
> --- a/src/jarabe/journal/model.py
> +++ b/src/jarabe/journal/model.py
> @@ -16,10 +16,11 @@
>
>  import logging
>  import os
> +import errno
>  from datetime import datetime
>  import time
>  import shutil
> -from stat import S_IFMT, S_IFDIR, S_IFREG
> +from stat import S_IFLNK, S_IFMT, S_IFDIR, S_IFREG
>  import traceback
>  import re
>
> @@ -258,7 +259,8 @@ class InplaceResultSet(BaseResultSet):
>         BaseResultSet.__init__(self, query, cache_limit)
>         self._mount_point = mount_point
>         self._file_list = None
> -        self._pending_directories = 0
> +        self._pending_directories = []
> +        self._pending_files = []
>         self._stopped = False
>
>         query_text = query.get('query', '')
> @@ -283,7 +285,9 @@ class InplaceResultSet(BaseResultSet):
>
>     def setup(self):
>         self._file_list = []
> -        self._recurse_dir(self._mount_point)
> +        self._pending_directories = [self._mount_point]
> +        self._pending_files = []
> +        gobject.idle_add(self._recurse_dir)
>
>     def stop(self):
>         self._stopped = True
> @@ -317,51 +321,79 @@ class InplaceResultSet(BaseResultSet):
>
>         return entries, total_count
>
> -    def _recurse_dir(self, dir_path):
> -        if self._stopped:
> -            return
> -
> -        for entry in os.listdir(dir_path):
> -            if entry.startswith('.'):
> -                continue
> -            full_path = dir_path + '/' + entry
> -            try:
> -                stat = os.stat(full_path)
> -                if S_IFMT(stat.st_mode) == S_IFDIR:
> -                    self._pending_directories += 1
> -                    gobject.idle_add(lambda s=full_path: self._recurse_dir(s))
> -
> -                elif S_IFMT(stat.st_mode) == S_IFREG:
> -                    add_to_list = True
> -
> -                    if self._regex is not None and \
> -                            not self._regex.match(full_path):
> -                        add_to_list = False
> -
> -                    if None not in [self._date_start, self._date_end] and \
> -                            (stat.st_mtime < self._date_start or
> -                             stat.st_mtime > self._date_end):
> -                        add_to_list = False
> -
> -                    if self._mime_types:
> -                        mime_type = gio.content_type_guess(filename=full_path)
> -                        if mime_type not in self._mime_types:
> -                            add_to_list = False
> -
> -                    if add_to_list:
> -                        file_info = (full_path, stat, int(stat.st_mtime))
> -                        self._file_list.append(file_info)
> -
> -                    self.progress.send(self)
> -
> -            except Exception:
> -                logging.error('Error reading file %r: %s' % \
> -                              (full_path, traceback.format_exc()))
> -
> -        if self._pending_directories == 0:
> -            self.setup_ready()
> -        else:
> -            self._pending_directories -= 1
> +    def _recurse_dir(self):
> +       if self._stopped:
> +           return False
> +
> +       if len(self._pending_files) > 0:
> +           full_path = self._pending_files.pop()
> +
> +           try:
> +               stat = os.lstat(full_path)
> +               if S_IFMT(stat.st_mode) == S_IFLNK:
> +                   link = os.readlink(full_path)
> +                   if link == '.':
> +                       return True
> +                   if link.startswith('/') and full_path.startswith(link):
> +                       return True
> +                   stat = os.stat(full_path)
> +
> +           except OSError, e:
> +               if e.errno not in [errno.ENOENT, errno.EPERM]:
> +                   logging.error('Error reading metadata of file %r: %s' %
> +                                 (full_path, traceback.format_exc()))
> +               return True
> +
> +           if S_IFMT(stat.st_mode) == S_IFDIR:
> +               self._pending_directories.append(full_path)
> +
> +           elif S_IFMT(stat.st_mode) == S_IFREG:
> +               add_to_list = True
> +
> +               if self._regex is not None and \
> +                       not self._regex.match(full_path):
> +                   add_to_list = False
> +
> +               if None not in [self._date_start, self._date_end] and \
> +                       (stat.st_mtime < self._date_start or
> +                        stat.st_mtime > self._date_end):
> +                   add_to_list = False
> +
> +               if self._mime_types:
> +                   mime_type = gio.content_type_guess(filename=full_path)
> +                   if mime_type not in self._mime_types:
> +                       add_to_list = False
> +
> +               if add_to_list:
> +                   file_info = (full_path, stat, int(stat.st_mtime))
> +                   self._file_list.append(file_info)
> +
> +               self.progress.send(self)
> +
> +           return True
> +
> +       if len(self._pending_directories) > 0:
> +           dir_path = self._pending_directories[0]
> +           self._pending_directories.remove(dir_path)
> +
> +           try:
> +               entries = os.listdir(dir_path)
> +           except OSError, e:
> +               if e.errno not in [errno.ENOENT, errno.EACCES,
> +                                  errno.EPERM, errno.ENOTDIR]:
> +                   logging.error('Error reading directory %r: %s' %
> +                                 (dir_path, traceback.format_exc()))
> +               return True
> +
> +           for entry in entries:
> +               if entry.startswith('.'):
> +                   continue
> +               self._pending_files.append(dir_path + '/' + entry)
> +           self._pending_files.reverse()
> +           return True
> +
> +       self.setup_ready()
> +       return False
>
>  def _get_file_metadata(path, stat):
>     client = gconf.client_get_default()
> --
> James Cameron
> http://quozl.linux.org.au/
> _______________________________________________
> Sugar-devel mailing list
> Sugar-devel at lists.sugarlabs.org
> http://lists.sugarlabs.org/listinfo/sugar-devel
>


More information about the Sugar-devel mailing list