[Sugar-devel] [PATCH sugar-0.84] journal scan of external media

James Cameron quozl at laptop.org
Wed Sep 8 03:00:31 EDT 2010


On Tue, Sep 07, 2010 at 01:18:11PM +0200, Sascha Silbe wrote:
> Agreed. I wonder if we could somehow wrap os.walk() in a single,
> low-priority idle task. Alternatively call find in a subprocess and
> use gobject.io_add_watch(). Especially if we want to continue indexing
> symlinked directories this would be a good approach (see below).

I've reworked it to be as asynchronous as I can make it without moving
into a separate thread or process.

- the progress bar updates smoothly during the filesystem scan, except
  when a system call (a single os.listdir or os.stat) is held up due
  to media or scheduling delays,

- the user interface is responsive during the filesystem scan,

- when an I/O error retry is happening on a USB HDD, the old code
  would not respond quickly to user cancelling the view, since there
  were thousands of idle tasks to get through before the event queue
  was checked, the new code responds to the event as soon as the current
  retry times out,

- normal expected I/O errors are handled and ignored.

The patch below is a rewrite of the function _recurse_dir, and I've
replaced tabs with spaces only because the indenting changes make the
patch hard to read otherwise.

I'm not asking for the patch to be accepted as is, I presume I'd be
asked to break it down into ten different patches so that each change
is explained in a different way to how my thought processes proceeded.
I think it needs to be split into smaller functions, and it doesn't
address the issue of relative recursive links.

What I'm asking for is a review of the new code so far.

The old code was recursive; by calling itself through the gobject idle
add.  Each idle task was called once.

The new code is called by the idle loop until it returns False.  There
is only one idle task, not one per directory.

The new code maintains a list of pending directories and files.  The
pending directory list is initialised with the mount point.  The pending
files are processed one call at a time until they are exhausted, then
the pending directories are processed once.  Eventually the lists become
empty and the result set is ready.

In effect, a pair of lists are used for synthetic recursion.

diff --git a/src/jarabe/journal/model.py b/src/jarabe/journal/model.py
index 50e8dc1..6ced07c 100644
--- a/src/jarabe/journal/model.py
+++ b/src/jarabe/journal/model.py
@@ -16,10 +16,11 @@
 
 import logging
 import os
+import errno
 from datetime import datetime
 import time
 import shutil
-from stat import S_IFMT, S_IFDIR, S_IFREG
+from stat import S_IFLNK, S_IFMT, S_IFDIR, S_IFREG
 import traceback
 import re
 
@@ -258,7 +259,8 @@ class InplaceResultSet(BaseResultSet):
         BaseResultSet.__init__(self, query, cache_limit)
         self._mount_point = mount_point
         self._file_list = None
-        self._pending_directories = 0
+        self._pending_directories = []
+        self._pending_files = []
         self._stopped = False
 
         query_text = query.get('query', '')
@@ -283,7 +285,9 @@ class InplaceResultSet(BaseResultSet):
 
     def setup(self):
         self._file_list = []
-        self._recurse_dir(self._mount_point)
+        self._pending_directories = [self._mount_point]
+        self._pending_files = []
+        gobject.idle_add(self._recurse_dir)
 
     def stop(self):
         self._stopped = True
@@ -317,51 +321,79 @@ class InplaceResultSet(BaseResultSet):
 
         return entries, total_count
 
-    def _recurse_dir(self, dir_path):
-        if self._stopped:
-            return
-
-        for entry in os.listdir(dir_path):
-            if entry.startswith('.'):
-                continue
-            full_path = dir_path + '/' + entry
-            try:
-                stat = os.stat(full_path)
-                if S_IFMT(stat.st_mode) == S_IFDIR:
-                    self._pending_directories += 1
-                    gobject.idle_add(lambda s=full_path: self._recurse_dir(s))
-
-                elif S_IFMT(stat.st_mode) == S_IFREG:
-                    add_to_list = True
-
-                    if self._regex is not None and \
-                            not self._regex.match(full_path):
-                        add_to_list = False
-
-                    if None not in [self._date_start, self._date_end] and \
-                            (stat.st_mtime < self._date_start or
-                             stat.st_mtime > self._date_end):
-                        add_to_list = False
-
-                    if self._mime_types:
-                        mime_type = gio.content_type_guess(filename=full_path)
-                        if mime_type not in self._mime_types:
-                            add_to_list = False
-
-                    if add_to_list:
-                        file_info = (full_path, stat, int(stat.st_mtime))
-                        self._file_list.append(file_info)
-
-                    self.progress.send(self)
-
-            except Exception:
-                logging.error('Error reading file %r: %s' % \
-                              (full_path, traceback.format_exc()))
-
-        if self._pending_directories == 0:
-            self.setup_ready()
-        else:
-            self._pending_directories -= 1
+    def _recurse_dir(self):
+	if self._stopped:
+	    return False
+
+	if len(self._pending_files) > 0:
+	    full_path = self._pending_files.pop()
+
+	    try:
+		stat = os.lstat(full_path)
+		if S_IFMT(stat.st_mode) == S_IFLNK:
+		    link = os.readlink(full_path)
+		    if link == '.':
+			return True
+		    if link.startswith('/') and full_path.startswith(link):
+			return True
+		    stat = os.stat(full_path)
+
+	    except OSError, e:
+		if e.errno not in [errno.ENOENT, errno.EPERM]:
+		    logging.error('Error reading metadata of file %r: %s' %
+				  (full_path, traceback.format_exc()))
+		return True
+
+	    if S_IFMT(stat.st_mode) == S_IFDIR:
+		self._pending_directories.append(full_path)
+
+	    elif S_IFMT(stat.st_mode) == S_IFREG:
+		add_to_list = True
+
+		if self._regex is not None and \
+			not self._regex.match(full_path):
+		    add_to_list = False
+
+		if None not in [self._date_start, self._date_end] and \
+			(stat.st_mtime < self._date_start or
+			 stat.st_mtime > self._date_end):
+		    add_to_list = False
+
+		if self._mime_types:
+		    mime_type = gio.content_type_guess(filename=full_path)
+		    if mime_type not in self._mime_types:
+			add_to_list = False
+
+		if add_to_list:
+		    file_info = (full_path, stat, int(stat.st_mtime))
+		    self._file_list.append(file_info)
+
+		self.progress.send(self)
+
+	    return True
+
+	if len(self._pending_directories) > 0:
+	    dir_path = self._pending_directories[0]
+	    self._pending_directories.remove(dir_path)
+
+	    try:
+		entries = os.listdir(dir_path)
+	    except OSError, e:
+		if e.errno not in [errno.ENOENT, errno.EACCES,
+				   errno.EPERM, errno.ENOTDIR]:
+		    logging.error('Error reading directory %r: %s' %
+				  (dir_path, traceback.format_exc()))
+		return True
+
+	    for entry in entries:
+		if entry.startswith('.'):
+		    continue
+		self._pending_files.append(dir_path + '/' + entry)
+	    self._pending_files.reverse()
+	    return True
+
+	self.setup_ready()
+	return False
 
 def _get_file_metadata(path, stat):
     client = gconf.client_get_default()
-- 
James Cameron
http://quozl.linux.org.au/


More information about the Sugar-devel mailing list