guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

03/03: git-download: Speed up 'git-predicate'.


From: Ludovic Courtès
Subject: 03/03: git-download: Speed up 'git-predicate'.
Date: Tue, 25 Jul 2017 17:24:28 -0400 (EDT)

civodul pushed a commit to branch master
in repository guix.

commit f135b4ae8397d2c501150d3ead3e0603e770ce3f
Author: Christopher Baines <address@hidden>
Date:   Mon Jun 19 08:14:43 2017 +0100

    git-download: Speed up 'git-predicate'.
    
    Adjust 'git-predicate' to use data structures that perform better when used
    with git repositories with a large number of files.
    
    Previously when matching either a regular file or directory, 'git-predicate'
    would search a list with a length equal to the number of files in the
    repository. As a search operation happens for roughly every file in the
    repository, this meant that the time taken to use 'git-predicate' to 
traverse
    all the files in a repository was roughly exponential with respect to the
    number of files in the repository.
    
    Now, for matching regular files or symlinks, 'git-predicate' uses a vhash
    using the inode value as the key. This should perform roughly in constant
    amount of time, instead of linear with respect to the number of files in the
    repository.
    
    For matching directories, 'git-predicate' now uses a tree structure stored 
in
    association lists. To check if a directory is in the tree, the tree is
    traversed from the root. The time complexity of this depends on the shape of
    the tree, but it should be an improvement on searching through the list of 
all
    files.
    
    * guix/git-download.scm (files->directory-tree, directory-in-tree?): New
    procedures.
    (git-predicate): Compute DIRECTORY-TREE.  Turn INODES into a vhash.
    Adjust body of lambda accordingly.
    
    Co-authored-by: Ludovic Courtès <address@hidden>
---
 guix/git-download.scm | 95 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 68 insertions(+), 27 deletions(-)

diff --git a/guix/git-download.scm b/guix/git-download.scm
index 3168355..5019a3e 100644
--- a/guix/git-download.scm
+++ b/guix/git-download.scm
@@ -1,6 +1,7 @@
 ;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2014, 2015, 2016 Ludovic Courtès <address@hidden>
+;;; Copyright © 2014, 2015, 2016, 2017 Ludovic Courtès <address@hidden>
 ;;; Copyright © 2017 Mathieu Lirzin <address@hidden>
+;;; Copyright © 2017 Christopher Baines <address@hidden>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -28,6 +29,7 @@
   #:use-module (ice-9 match)
   #:use-module (ice-9 popen)
   #:use-module (ice-9 rdelim)
+  #:use-module (ice-9 vlist)
   #:use-module (srfi srfi-1)
   #:export (git-reference
             git-reference?
@@ -125,45 +127,84 @@ HASH-ALGO (a symbol).  Use NAME as the file name, or a 
generic name if #f."
   "Return the file-name for packages using git-download."
   (string-append name "-" version "-checkout"))
 
+
+;;;
+;;; 'git-predicate'.
+;;;
+
+(define (files->directory-tree files)
+  "Return a tree of vhashes representing the directory listed in FILES, a list
+like '(\"a/b\" \"b/c/d\")."
+  (fold (lambda (file result)
+          (let loop ((file (string-split file #\/))
+                     (result result))
+            (match file
+              ((_)
+               result)
+              ((directory children ...)
+               (match (vhash-assoc directory result)
+                 (#f
+                  (vhash-cons directory (loop children vlist-null)
+                              result))
+                 ((_ . previous)
+                  ;; XXX: 'vhash-delete' is O(n).
+                  (vhash-cons directory (loop children previous)
+                              (vhash-delete directory result)))))
+              (()
+               result))))
+        vlist-null
+        files))
+
+(define (directory-in-tree? tree directory)
+  "Return true if DIRECTORY, a string like \"a/b\", denotes a directory listed
+in TREE."
+  (let loop ((directory (string-split directory #\/))
+             (tree       tree))
+    (match directory
+      (()
+       #t)
+      ((head . tail)
+       (match (vhash-assoc head tree)
+         ((_ . sub-tree) (loop tail sub-tree))
+         (#f #f))))))
+
 (define (git-predicate directory)
   "Return a predicate that returns true if a file is part of the Git checkout
 living at DIRECTORY.  Upon Git failure, return #f instead of a predicate.
 
 The returned predicate takes two arguments FILE and STAT where FILE is an
 absolute file name and STAT is the result of 'lstat'."
-  (define (parent-directory? thing directory)
-    ;; Return #t if DIRECTORY is the parent of THING.
-    (or (string-suffix? thing directory)
-        (and (string-index thing #\/)
-             (parent-directory? (dirname thing) directory))))
-
-  (let* ((pipe        (with-directory-excursion directory
-                        (open-pipe* OPEN_READ "git" "ls-files")))
-         (files       (let loop ((lines '()))
-                        (match (read-line pipe)
-                          ((? eof-object?)
-                           (reverse lines))
-                          (line
-                           (loop (cons line lines))))))
-         (inodes      (map (lambda (file)
-                             (let ((stat (lstat
-                                          (string-append directory "/" file))))
-                               (cons (stat:dev stat) (stat:ino stat))))
-                           files))
-         (status      (close-pipe pipe)))
+  (let* ((pipe           (with-directory-excursion directory
+                           (open-pipe* OPEN_READ "git" "ls-files")))
+         (files          (let loop ((lines '()))
+                           (match (read-line pipe)
+                             ((? eof-object?)
+                              (reverse lines))
+                             (line
+                              (loop (cons line lines))))))
+         (directory-tree (files->directory-tree files))
+         (inodes         (fold (lambda (file result)
+                                 (let ((stat
+                                        (lstat (string-append directory "/"
+                                                              file))))
+                                   (vhash-consv (stat:ino stat) (stat:dev stat)
+                                                result)))
+                               vlist-null
+                               files))
+         (prefix-length  (+ 1 (string-length (canonicalize-path directory))))
+         (status         (close-pipe pipe)))
     (and (zero? status)
          (lambda (file stat)
            (match (stat:type stat)
              ('directory
-              ;; 'git ls-files' does not list directories, only regular files,
-              ;; so we need this special trick.
-              (any (lambda (f) (parent-directory? f file))
-                   files))
+              (directory-in-tree? directory-tree
+                                  (string-drop file prefix-length)))
              ((or 'regular 'symlink)
               ;; Comparing file names is always tricky business so we rely on
               ;; inode numbers instead
-              (member (cons (stat:dev stat) (stat:ino stat))
-                      inodes))
+              (match (vhash-assv (stat:ino stat) inodes)
+                ((_ . dev) (= dev (stat:dev stat)))
+                (#f        #f)))
              (_
               #f))))))
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]