[Sugar-devel] [PATCH sugar-0.84] journal scan of external media
Tomeu Vizoso
tomeu at sugarlabs.org
Wed Sep 8 03:20:21 EDT 2010
On Wed, Sep 8, 2010 at 09:00, James Cameron <quozl at laptop.org> wrote:
> On Tue, Sep 07, 2010 at 01:18:11PM +0200, Sascha Silbe wrote:
>> Agreed. I wonder if we could somehow wrap os.walk() in a single,
>> low-priority idle task. Alternatively call find in a subprocess and
>> use gobject.io_add_watch(). Especially if we want to continue indexing
>> symlinked directories this would be a good approach (see below).
>
> I've reworked it to be as asynchronous as I can make it without moving
> into a separate thread or process.
>
> - the progress bar updates smoothly during the filesystem scan, except
> when a system call (a single os.listdir or os.stat) is held up due
> to media or scheduling delays,
>
> - the user interface is responsive during the filesystem scan,
>
> - when an I/O error retry is happening on a USB HDD, the old code
> would not respond quickly to user cancelling the view, since there
> were thousands of idle tasks to get through before the event queue
> was checked, the new code responds to the event as soon as the current
> retry times out,
>
> - normal expected I/O errors are handled and ignored.
>
> The patch below is a rewrite of the function _recurse_dir, and I've
> replaced tabs with spaces only because the indenting changes make the
> patch hard to read otherwise.
>
> I'm not asking for the patch to be accepted as is, I presume I'd be
> asked to break it down into ten different patches so that each change
> is explained in a different way to how my thought processes proceeded.
FWIW, I think this should go in a single patch because it doesn't make
sense to cherry-pick pieces of a single refactoring/rewrite.
About the tabs change, the -w flag to git diff and blame can cope with
it, but I think when someone finds tabs should just push that change
inmediately. (Though I have grepped through master and have only found
tabs in invites.py).
> I think it needs to be split into smaller functions,
+1
Regards,
Tomeu
> and it doesn't
> address the issue of relative recursive links.
>
> What I'm asking for is a review of the new code so far.
>
> The old code was recursive; by calling itself through the gobject idle
> add. Each idle task was called once.
>
> The new code is called by the idle loop until it returns False. There
> is only one idle task, not one per directory.
>
> The new code maintains a list of pending directories and files. The
> pending directory list is initialised with the mount point. The pending
> files are processed one call at a time until they are exhausted, then
> the pending directories are processed once. Eventually the lists become
> empty and the result set is ready.
>
> In effect, a pair of lists are used for synthetic recursion.
>
> diff --git a/src/jarabe/journal/model.py b/src/jarabe/journal/model.py
> index 50e8dc1..6ced07c 100644
> --- a/src/jarabe/journal/model.py
> +++ b/src/jarabe/journal/model.py
> @@ -16,10 +16,11 @@
>
> import logging
> import os
> +import errno
> from datetime import datetime
> import time
> import shutil
> -from stat import S_IFMT, S_IFDIR, S_IFREG
> +from stat import S_IFLNK, S_IFMT, S_IFDIR, S_IFREG
> import traceback
> import re
>
> @@ -258,7 +259,8 @@ class InplaceResultSet(BaseResultSet):
> BaseResultSet.__init__(self, query, cache_limit)
> self._mount_point = mount_point
> self._file_list = None
> - self._pending_directories = 0
> + self._pending_directories = []
> + self._pending_files = []
> self._stopped = False
>
> query_text = query.get('query', '')
> @@ -283,7 +285,9 @@ class InplaceResultSet(BaseResultSet):
>
> def setup(self):
> self._file_list = []
> - self._recurse_dir(self._mount_point)
> + self._pending_directories = [self._mount_point]
> + self._pending_files = []
> + gobject.idle_add(self._recurse_dir)
>
> def stop(self):
> self._stopped = True
> @@ -317,51 +321,79 @@ class InplaceResultSet(BaseResultSet):
>
> return entries, total_count
>
> - def _recurse_dir(self, dir_path):
> - if self._stopped:
> - return
> -
> - for entry in os.listdir(dir_path):
> - if entry.startswith('.'):
> - continue
> - full_path = dir_path + '/' + entry
> - try:
> - stat = os.stat(full_path)
> - if S_IFMT(stat.st_mode) == S_IFDIR:
> - self._pending_directories += 1
> - gobject.idle_add(lambda s=full_path: self._recurse_dir(s))
> -
> - elif S_IFMT(stat.st_mode) == S_IFREG:
> - add_to_list = True
> -
> - if self._regex is not None and \
> - not self._regex.match(full_path):
> - add_to_list = False
> -
> - if None not in [self._date_start, self._date_end] and \
> - (stat.st_mtime < self._date_start or
> - stat.st_mtime > self._date_end):
> - add_to_list = False
> -
> - if self._mime_types:
> - mime_type = gio.content_type_guess(filename=full_path)
> - if mime_type not in self._mime_types:
> - add_to_list = False
> -
> - if add_to_list:
> - file_info = (full_path, stat, int(stat.st_mtime))
> - self._file_list.append(file_info)
> -
> - self.progress.send(self)
> -
> - except Exception:
> - logging.error('Error reading file %r: %s' % \
> - (full_path, traceback.format_exc()))
> -
> - if self._pending_directories == 0:
> - self.setup_ready()
> - else:
> - self._pending_directories -= 1
> + def _recurse_dir(self):
> + if self._stopped:
> + return False
> +
> + if len(self._pending_files) > 0:
> + full_path = self._pending_files.pop()
> +
> + try:
> + stat = os.lstat(full_path)
> + if S_IFMT(stat.st_mode) == S_IFLNK:
> + link = os.readlink(full_path)
> + if link == '.':
> + return True
> + if link.startswith('/') and full_path.startswith(link):
> + return True
> + stat = os.stat(full_path)
> +
> + except OSError, e:
> + if e.errno not in [errno.ENOENT, errno.EPERM]:
> + logging.error('Error reading metadata of file %r: %s' %
> + (full_path, traceback.format_exc()))
> + return True
> +
> + if S_IFMT(stat.st_mode) == S_IFDIR:
> + self._pending_directories.append(full_path)
> +
> + elif S_IFMT(stat.st_mode) == S_IFREG:
> + add_to_list = True
> +
> + if self._regex is not None and \
> + not self._regex.match(full_path):
> + add_to_list = False
> +
> + if None not in [self._date_start, self._date_end] and \
> + (stat.st_mtime < self._date_start or
> + stat.st_mtime > self._date_end):
> + add_to_list = False
> +
> + if self._mime_types:
> + mime_type = gio.content_type_guess(filename=full_path)
> + if mime_type not in self._mime_types:
> + add_to_list = False
> +
> + if add_to_list:
> + file_info = (full_path, stat, int(stat.st_mtime))
> + self._file_list.append(file_info)
> +
> + self.progress.send(self)
> +
> + return True
> +
> + if len(self._pending_directories) > 0:
> + dir_path = self._pending_directories[0]
> + self._pending_directories.remove(dir_path)
> +
> + try:
> + entries = os.listdir(dir_path)
> + except OSError, e:
> + if e.errno not in [errno.ENOENT, errno.EACCES,
> + errno.EPERM, errno.ENOTDIR]:
> + logging.error('Error reading directory %r: %s' %
> + (dir_path, traceback.format_exc()))
> + return True
> +
> + for entry in entries:
> + if entry.startswith('.'):
> + continue
> + self._pending_files.append(dir_path + '/' + entry)
> + self._pending_files.reverse()
> + return True
> +
> + self.setup_ready()
> + return False
>
> def _get_file_metadata(path, stat):
> client = gconf.client_get_default()
> --
> James Cameron
> http://quozl.linux.org.au/
> _______________________________________________
> Sugar-devel mailing list
> Sugar-devel at lists.sugarlabs.org
> http://lists.sugarlabs.org/listinfo/sugar-devel
>
More information about the Sugar-devel
mailing list