[Sugar-devel] [PATCH sugar-0.84] journal scan of external media
James Cameron
quozl at laptop.org
Wed Sep 8 03:00:31 EDT 2010
On Tue, Sep 07, 2010 at 01:18:11PM +0200, Sascha Silbe wrote:
> Agreed. I wonder if we could somehow wrap os.walk() in a single,
> low-priority idle task. Alternatively call find in a subprocess and
> use gobject.io_add_watch(). Especially if we want to continue indexing
> symlinked directories this would be a good approach (see below).
I've reworked it to be as asynchronous as I can make it without moving
into a separate thread or process.
- the progress bar updates smoothly during the filesystem scan, except
when a system call (a single os.listdir or os.stat) is held up due
to media or scheduling delays,
- the user interface is responsive during the filesystem scan,
- when an I/O error retry is happening on a USB HDD, the old code
would not respond quickly to user cancelling the view, since there
were thousands of idle tasks to get through before the event queue
was checked, the new code responds to the event as soon as the current
retry times out,
- normal expected I/O errors are handled and ignored.
The patch below is a rewrite of the function _recurse_dir, and I've
replaced tabs with spaces only because the indenting changes make the
patch hard to read otherwise.
I'm not asking for the patch to be accepted as is, I presume I'd be
asked to break it down into ten different patches so that each change
is explained in a different way to how my thought processes proceeded.
I think it needs to be split into smaller functions, and it doesn't
address the issue of relative recursive links.
What I'm asking for is a review of the new code so far.
The old code was recursive; by calling itself through the gobject idle
add. Each idle task was called once.
The new code is called by the idle loop until it returns False. There
is only one idle task, not one per directory.
The new code maintains a list of pending directories and files. The
pending directory list is initialised with the mount point. The pending
files are processed one call at a time until they are exhausted, then
the pending directories are processed once. Eventually the lists become
empty and the result set is ready.
In effect, a pair of lists are used for synthetic recursion.
diff --git a/src/jarabe/journal/model.py b/src/jarabe/journal/model.py
index 50e8dc1..6ced07c 100644
--- a/src/jarabe/journal/model.py
+++ b/src/jarabe/journal/model.py
@@ -16,10 +16,11 @@
import logging
import os
+import errno
from datetime import datetime
import time
import shutil
-from stat import S_IFMT, S_IFDIR, S_IFREG
+from stat import S_IFLNK, S_IFMT, S_IFDIR, S_IFREG
import traceback
import re
@@ -258,7 +259,8 @@ class InplaceResultSet(BaseResultSet):
BaseResultSet.__init__(self, query, cache_limit)
self._mount_point = mount_point
self._file_list = None
- self._pending_directories = 0
+ self._pending_directories = []
+ self._pending_files = []
self._stopped = False
query_text = query.get('query', '')
@@ -283,7 +285,9 @@ class InplaceResultSet(BaseResultSet):
def setup(self):
self._file_list = []
- self._recurse_dir(self._mount_point)
+ self._pending_directories = [self._mount_point]
+ self._pending_files = []
+ gobject.idle_add(self._recurse_dir)
def stop(self):
self._stopped = True
@@ -317,51 +321,79 @@ class InplaceResultSet(BaseResultSet):
return entries, total_count
- def _recurse_dir(self, dir_path):
- if self._stopped:
- return
-
- for entry in os.listdir(dir_path):
- if entry.startswith('.'):
- continue
- full_path = dir_path + '/' + entry
- try:
- stat = os.stat(full_path)
- if S_IFMT(stat.st_mode) == S_IFDIR:
- self._pending_directories += 1
- gobject.idle_add(lambda s=full_path: self._recurse_dir(s))
-
- elif S_IFMT(stat.st_mode) == S_IFREG:
- add_to_list = True
-
- if self._regex is not None and \
- not self._regex.match(full_path):
- add_to_list = False
-
- if None not in [self._date_start, self._date_end] and \
- (stat.st_mtime < self._date_start or
- stat.st_mtime > self._date_end):
- add_to_list = False
-
- if self._mime_types:
- mime_type = gio.content_type_guess(filename=full_path)
- if mime_type not in self._mime_types:
- add_to_list = False
-
- if add_to_list:
- file_info = (full_path, stat, int(stat.st_mtime))
- self._file_list.append(file_info)
-
- self.progress.send(self)
-
- except Exception:
- logging.error('Error reading file %r: %s' % \
- (full_path, traceback.format_exc()))
-
- if self._pending_directories == 0:
- self.setup_ready()
- else:
- self._pending_directories -= 1
+ def _recurse_dir(self):
+ if self._stopped:
+ return False
+
+ if len(self._pending_files) > 0:
+ full_path = self._pending_files.pop()
+
+ try:
+ stat = os.lstat(full_path)
+ if S_IFMT(stat.st_mode) == S_IFLNK:
+ link = os.readlink(full_path)
+ if link == '.':
+ return True
+ if link.startswith('/') and full_path.startswith(link):
+ return True
+ stat = os.stat(full_path)
+
+ except OSError, e:
+ if e.errno not in [errno.ENOENT, errno.EPERM]:
+ logging.error('Error reading metadata of file %r: %s' %
+ (full_path, traceback.format_exc()))
+ return True
+
+ if S_IFMT(stat.st_mode) == S_IFDIR:
+ self._pending_directories.append(full_path)
+
+ elif S_IFMT(stat.st_mode) == S_IFREG:
+ add_to_list = True
+
+ if self._regex is not None and \
+ not self._regex.match(full_path):
+ add_to_list = False
+
+ if None not in [self._date_start, self._date_end] and \
+ (stat.st_mtime < self._date_start or
+ stat.st_mtime > self._date_end):
+ add_to_list = False
+
+ if self._mime_types:
+ mime_type = gio.content_type_guess(filename=full_path)
+ if mime_type not in self._mime_types:
+ add_to_list = False
+
+ if add_to_list:
+ file_info = (full_path, stat, int(stat.st_mtime))
+ self._file_list.append(file_info)
+
+ self.progress.send(self)
+
+ return True
+
+ if len(self._pending_directories) > 0:
+ dir_path = self._pending_directories[0]
+ self._pending_directories.remove(dir_path)
+
+ try:
+ entries = os.listdir(dir_path)
+ except OSError, e:
+ if e.errno not in [errno.ENOENT, errno.EACCES,
+ errno.EPERM, errno.ENOTDIR]:
+ logging.error('Error reading directory %r: %s' %
+ (dir_path, traceback.format_exc()))
+ return True
+
+ for entry in entries:
+ if entry.startswith('.'):
+ continue
+ self._pending_files.append(dir_path + '/' + entry)
+ self._pending_files.reverse()
+ return True
+
+ self.setup_ready()
+ return False
def _get_file_metadata(path, stat):
client = gconf.client_get_default()
--
James Cameron
http://quozl.linux.org.au/
More information about the Sugar-devel
mailing list