[Sugar-devel] [PATCH sugar-0.84] journal scan of external media

James Cameron quozl at laptop.org
Thu Sep 9 00:39:53 EDT 2010


Revised patch that handles recursive symlinks by processing each
directory only once.  Directories are identified by inode and device.

diff --git a/src/jarabe/journal/model.py b/src/jarabe/journal/model.py
index 50e8dc1..0bb571c 100644
--- a/src/jarabe/journal/model.py
+++ b/src/jarabe/journal/model.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2007-2008, One Laptop Per Child
+# Copyright (C) 2007-2010, One Laptop per Child
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -16,10 +16,11 @@
 
 import logging
 import os
+import errno
 from datetime import datetime
 import time
 import shutil
-from stat import S_IFMT, S_IFDIR, S_IFREG
+from stat import S_IFLNK, S_IFMT, S_IFDIR, S_IFREG
 import traceback
 import re
 
@@ -258,7 +259,9 @@ class InplaceResultSet(BaseResultSet):
         BaseResultSet.__init__(self, query, cache_limit)
         self._mount_point = mount_point
         self._file_list = None
-        self._pending_directories = 0
+        self._pending_directories = []
+        self._visited_directories = []
+        self._pending_files = []
         self._stopped = False
 
         query_text = query.get('query', '')
@@ -283,7 +286,10 @@ class InplaceResultSet(BaseResultSet):
 
     def setup(self):
         self._file_list = []
-        self._recurse_dir(self._mount_point)
+        self._pending_directories = [self._mount_point]
+        self._visited_directories = []
+        self._pending_files = []
+        gobject.idle_add(self._scan)
 
     def stop(self):
         self._stopped = True
@@ -317,51 +323,99 @@ class InplaceResultSet(BaseResultSet):
 
         return entries, total_count
 
-    def _recurse_dir(self, dir_path):
+    def _scan(self):
         if self._stopped:
-            return
+            return False
 
-        for entry in os.listdir(dir_path):
+        self.progress.send(self)
+
+        if len(self._pending_files) > 0:
+            return self._scan_a_file()
+
+        if len(self._pending_directories) > 0:
+            return self._scan_a_directory()
+
+        self.setup_ready()
+        self._visited_directories = []
+        return False
+
+    def _scan_a_file(self):
+        full_path = self._pending_files.pop(0)
+
+        try:
+            stat = os.lstat(full_path)
+        except OSError, e:
+            if e.errno != errno.ENOENT:
+                logging.exception(
+                    'Error reading metadata of file %r', full_path)
+            return True
+
+        if S_IFMT(stat.st_mode) == S_IFLNK:
+            try:
+                link = os.readlink(full_path)
+            except OSError, e:
+                logging.exception(
+                    'Error reading target of link %r', full_path)
+                return True
+
+            if link == '.':
+                return True
+            if link.startswith('/') and full_path.startswith(link):
+                return True
+
+            try:
+                stat = os.stat(full_path)
+
+            except OSError, e:
+                if e.errno != errno.ENOENT:
+                    logging.exception(
+                        'Error reading metadata of linked file %r', full_path)
+                return True
+
+        if S_IFMT(stat.st_mode) == S_IFDIR:
+            id_tuple = stat.st_ino, stat.st_dev
+            if not id_tuple in self._visited_directories:
+                self._visited_directories.append(id_tuple)
+                self._pending_directories.append(full_path)
+            return True
+
+        if S_IFMT(stat.st_mode) != S_IFREG:
+            return True
+
+        if self._regex is not None and \
+                not self._regex.match(full_path):
+            return True
+
+        if None not in [self._date_start, self._date_end] and \
+                (stat.st_mtime < self._date_start or
+                 stat.st_mtime > self._date_end):
+            return True
+
+        if self._mime_types:
+            mime_type = gio.content_type_guess(filename=full_path)
+            if mime_type not in self._mime_types:
+                return True
+
+        file_info = (full_path, stat, int(stat.st_mtime))
+        self._file_list.append(file_info)
+
+        return True
+
+    def _scan_a_directory(self):
+        dir_path = self._pending_directories.pop(0)
+
+        try:
+            entries = os.listdir(dir_path)
+        except OSError, e:
+            if e.errno not in [errno.EACCES, errno.ENOTDIR]:
+                logging.exception('Error reading directory %r', dir_path)
+            return True
+
+        for entry in entries:
             if entry.startswith('.'):
                 continue
-            full_path = dir_path + '/' + entry
-            try:
-                stat = os.stat(full_path)
-                if S_IFMT(stat.st_mode) == S_IFDIR:
-                    self._pending_directories += 1
-                    gobject.idle_add(lambda s=full_path: self._recurse_dir(s))
-
-                elif S_IFMT(stat.st_mode) == S_IFREG:
-                    add_to_list = True
-
-                    if self._regex is not None and \
-                            not self._regex.match(full_path):
-                        add_to_list = False
-
-                    if None not in [self._date_start, self._date_end] and \
-                            (stat.st_mtime < self._date_start or
-                             stat.st_mtime > self._date_end):
-                        add_to_list = False
-
-                    if self._mime_types:
-                        mime_type = gio.content_type_guess(filename=full_path)
-                        if mime_type not in self._mime_types:
-                            add_to_list = False
-
-                    if add_to_list:
-                        file_info = (full_path, stat, int(stat.st_mtime))
-                        self._file_list.append(file_info)
-
-                    self.progress.send(self)
-
-            except Exception:
-                logging.error('Error reading file %r: %s' % \
-                              (full_path, traceback.format_exc()))
-
-        if self._pending_directories == 0:
-            self.setup_ready()
-        else:
-            self._pending_directories -= 1
+            self._pending_files.append(dir_path + '/' + entry)
+        return True
 
 def _get_file_metadata(path, stat):
     client = gconf.client_get_default()

(made with git diff --patience)

-- 
James Cameron
http://quozl.linux.org.au/


More information about the Sugar-devel mailing list