guix-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[bug#39258] Faster guix search using an sqlite cache


From: Arun Isaac
Subject: [bug#39258] Faster guix search using an sqlite cache
Date: Fri, 24 Jan 2020 01:21:57 +0530

Hi,

As discussed on guix-devel at
https://lists.gnu.org/archive/html/guix-devel/2020-01/msg00310.html , I
am working on an sqlite cache to improve guix search performance. I have
attached a highly incomplete WIP patch. The patch attempts to
reimplement the package-cache-file hook in guix/channels.scm using a
sqlite database. To this end, it rewrites most of the
generate-package-cache and cache-lookup functions in gnu/packages.scm. I
am yet to hook this up to guix search.

At the moment, I am having some difficulty populating the sqlite
database. generate-package-cache populates the database correctly when
invoked from a normal guile REPL using geiser, but fails to do so when
run by the guix daemon during guix pull.

I ran guix pull using

$ ./pre-inst-env guix pull --url=$PWD --branch=search -p /tmp/test

where search is the branch I am working on.

Running

$ ls /tmp/test/lib/guix -lh

shows

total 2.1M
-r--r--r-- 2 root root 2.1M ஜன.   1  1970 package-cache.sqlite
-r--r--r-- 2 root root  26K ஜன.   1  1970 package-cache.sqlite-journal

On examining package-cache.sqlite, I find that no records have been
written. And, there is a lingering journal file that shouldn't be
there. For some reason, populating the sqlite database does not work
with guix pull. sqlite probably crashes and leaves the journal file.

If I try to populate the database with each package record being
inserted in its own transaction, at least some of the insertions
work. But the journal file still lingers. My unverified guess is that
everything except the last transaction was successful.

Any ideas what's going on?

Also, inserting each package in its own transaction is ridiculously slow
and so that is out of the question. See https://www.sqlite.org/faq.html#q19

From d1305351a90a84eb75e4769284d5e06927eade3e Mon Sep 17 00:00:00 2001
From: Arun Isaac <address@hidden>
Date: Tue, 21 Jan 2020 20:45:43 +0530
Subject: [PATCH] fast search

---
 build-aux/build-self.scm |   5 +
 gnu/packages.scm         | 207 +++++++++++++++++++++++----------------
 2 files changed, 128 insertions(+), 84 deletions(-)

diff --git a/build-aux/build-self.scm b/build-aux/build-self.scm
index fc13032b73..c123ad3b11 100644
--- a/build-aux/build-self.scm
+++ b/build-aux/build-self.scm
@@ -264,6 +264,9 @@ interface (FFI) of Guile.")
   (define fake-git
     (scheme-file "git.scm" #~(define-module (git))))
 
+  (define fake-sqlite3
+    (scheme-file "sqlite3.scm" #~(define-module (sqlite3))))
+
   (with-imported-modules `(((guix config)
                             => ,(make-config.scm))
 
@@ -278,6 +281,8 @@ interface (FFI) of Guile.")
                            ;; (git) to placate it.
                            ((git) => ,fake-git)
 
+                           ((sqlite3) => ,fake-sqlite3)
+
                            ,@(source-module-closure `((guix store)
                                                       (guix self)
                                                       (guix derivations)
diff --git a/gnu/packages.scm b/gnu/packages.scm
index d22c992bb1..4e2c52e62d 100644
--- a/gnu/packages.scm
+++ b/gnu/packages.scm
@@ -43,6 +43,7 @@
   #:use-module (srfi srfi-34)
   #:use-module (srfi srfi-35)
   #:use-module (srfi srfi-39)
+  #:use-module (sqlite3)
   #:export (search-patch
             search-patches
             search-auxiliary-file
@@ -204,10 +205,8 @@ PROC is called along these lines:
 PROC can use #:allow-other-keys to ignore the bits it's not interested in.
 When a package cache is available, this procedure does not actually load any
 package module."
-  (define cache
-    (load-package-cache (current-profile)))
-
-  (if (and cache (cache-is-authoritative?))
+  (if (and (cache-is-authoritative?)
+           (current-profile))
       (vhash-fold (lambda (name vector result)
                     (match vector
                       (#(name version module symbol outputs
@@ -220,7 +219,7 @@ package module."
                              #:supported? supported?
                              #:deprecated? deprecated?))))
                   init
-                  cache)
+                  (cache-lookup (current-profile)))
       (fold-packages (lambda (package result)
                        (proc (package-name package)
                              (package-version package)
@@ -252,31 +251,7 @@ is guaranteed to never traverse the same package twice."
 
 (define %package-cache-file
   ;; Location of the package cache.
-  "/lib/guix/package.cache")
-
-(define load-package-cache
-  (mlambda (profile)
-    "Attempt to load the package cache.  On success return a vhash keyed by
-package names.  Return #f on failure."
-    (match profile
-      (#f #f)
-      (profile
-       (catch 'system-error
-         (lambda ()
-           (define lst
-             (load-compiled (string-append profile %package-cache-file)))
-           (fold (lambda (item vhash)
-                   (match item
-                     (#(name version module symbol outputs
-                             supported? deprecated?
-                             file line column)
-                      (vhash-cons name item vhash))))
-                 vlist-null
-                 lst))
-         (lambda args
-           (if (= ENOENT (system-error-errno args))
-               #f
-               (apply throw args))))))))
+  "/lib/guix/package-cache.sqlite")
 
 (define find-packages-by-name/direct              ;bypass the cache
   (let ((packages (delay
@@ -297,25 +272,57 @@ decreasing version order."
                     matching)
             matching)))))
 
-(define (cache-lookup cache name)
+(define* (cache-lookup profile #:optional name)
   "Lookup package NAME in CACHE.  Return a list sorted in increasing version
 order."
   (define (package-version<? v1 v2)
     (version>? (vector-ref v2 1) (vector-ref v1 1)))
 
-  (sort (vhash-fold* cons '() name cache)
-        package-version<?))
+  (define (int->boolean n)
+    (case n
+      ((0) #f)
+      ((1) #t)))
+
+  (define (string->list str)
+    (call-with-input-string str read))
+
+  (define select-statement
+    (string-append
+     "SELECT name, version, module, symbol, outputs, supported, superseded, 
locationFile, locationLine, locationColumn from packages"
+     (if name " WHERE name = :name" "")))
+
+  (define cache-file
+    (string-append profile %package-cache-file))
+
+  (let* ((db (sqlite-open cache-file SQLITE_OPEN_READONLY))
+         (statement (sqlite-prepare db select-statement)))
+    (when name
+      (sqlite-bind-arguments statement #:name name))
+    (let ((result (sqlite-fold (lambda (v result)
+                                 (match v
+                                   (#(name version module symbol outputs 
supported superseded file line column)
+                                    (cons
+                                     (vector name
+                                             version
+                                             (string->list module)
+                                             (string->symbol symbol)
+                                             (string->list outputs)
+                                             (int->boolean supported)
+                                             (int->boolean superseded)
+                                             (list file line column))
+                                     result))))
+                               '() statement)))
+      (sqlite-finalize statement)
+      (sqlite-close db)
+      (sort result package-version<?))))
 
 (define* (find-packages-by-name name #:optional version)
   "Return the list of packages with the given NAME.  If VERSION is not #f,
 then only return packages whose version is prefixed by VERSION, sorted in
 decreasing version order."
-  (define cache
-    (load-package-cache (current-profile)))
-
-  (if (and (cache-is-authoritative?) cache)
-      (match (cache-lookup cache name)
-        (#f #f)
+  (if (and (cache-is-authoritative?)
+           (current-profile))
+      (match (cache-lookup (current-profile) name)
         ((#(_ versions modules symbols _ _ _ _ _ _) ...)
          (fold (lambda (version* module symbol result)
                  (if (or (not version)
@@ -331,12 +338,9 @@ decreasing version order."
 (define* (find-package-locations name #:optional version)
   "Return a list of version/location pairs corresponding to each package
 matching NAME and VERSION."
-  (define cache
-    (load-package-cache (current-profile)))
-
-  (if (and cache (cache-is-authoritative?))
-      (match (cache-lookup cache name)
-        (#f '())
+  (if (and (cache-is-authoritative?)
+           (current-profile))
+      (match (cache-lookup (current-profile) name)
         ((#(name versions modules symbols outputs
                  supported? deprecated?
                  files lines columns) ...)
@@ -372,6 +376,9 @@ VERSION."
 ;; Prevent Guile 3 from inlining this procedure so we can mock it in tests.
 (set! find-best-packages-by-name find-best-packages-by-name)
 
+(define (list->string x)
+  (call-with-output-string (cut write x <>)))
+
 (define (generate-package-cache directory)
   "Generate under DIRECTORY a cache of all the available packages.
 
@@ -381,49 +388,81 @@ reducing the memory footprint."
   (define cache-file
     (string-append directory %package-cache-file))
 
-  (define (expand-cache module symbol variable result+seen)
+  (define schema
+    "CREATE TABLE packages (name text,
+version text,
+module text,
+symbol text,
+outputs text,
+supported int,
+superseded int,
+locationFile text,
+locationLine int,
+locationColumn int);
+CREATE VIRTUAL TABLE packageSearch USING fts5(name, searchText);")
+
+  (define insert-statement
+    "INSERT INTO packages(name, version, module, symbol, outputs, supported, 
superseded, locationFile, locationLine, locationColumn)
+VALUES(:name, :version, :module, :symbol, :outputs, :supported, :superseded, 
:locationfile, :locationline, :locationcolumn)")
+
+  (define insert-package-search-statement
+    "INSERT INTO packageSearch(name, searchText) VALUES(:name, :searchtext)")
+
+  (define (boolean->int x)
+    (if x 1 0))
+
+  (define (list->string x)
+    (call-with-output-string (cut write x <>)))
+
+  (define (insert-package db module symbol variable seen)
     (match (false-if-exception (variable-ref variable))
       ((? package? package)
-       (match result+seen
-         ((result . seen)
-          (if (or (vhash-assq package seen)
-                  (hidden-package? package))
-              (cons result seen)
-              (cons (cons `#(,(package-name package)
-                             ,(package-version package)
-                             ,(module-name module)
-                             ,symbol
-                             ,(package-outputs package)
-                             ,(->bool (supported-package? package))
-                             ,(->bool (package-superseded package))
-                             ,@(let ((loc (package-location package)))
-                                 (if loc
-                                     `(,(location-file loc)
-                                       ,(location-line loc)
-                                       ,(location-column loc))
-                                     '(#f #f #f))))
-                          result)
-                    (vhash-consq package #t seen))))))
-      (_
-       result+seen)))
-
-  (define exp
-    (first
-     (fold-module-public-variables* expand-cache
-                                    (cons '() vlist-null)
-                                    (all-modules (%package-module-path)
-                                                 #:warn
-                                                 warn-about-load-error))))
+       (cond
+        ((or (vhash-assq package seen)
+             (hidden-package? package))
+         seen)
+        (else
+         (let ((statement (sqlite-prepare db insert-statement)))
+           (sqlite-bind-arguments statement
+                                  #:name (package-name package)
+                                  #:version (package-version package)
+                                  #:module (list->string (module-name module))
+                                  #:symbol (symbol->string symbol)
+                                  #:outputs (list->string (package-outputs 
package))
+                                  #:supported (boolean->int 
(supported-package? package))
+                                  #:superseded (boolean->int 
(package-superseded package))
+                                  #:locationfile (cond
+                                                  ((package-location package) 
=> location-file)
+                                                  (else #f))
+                                  #:locationline (cond
+                                                  ((package-location package) 
=> location-line)
+                                                  (else #f))
+                                  #:locationcolumn (cond
+                                                    ((package-location 
package) => location-column)
+                                                    (else #f)))
+           (sqlite-fold cons '() statement)
+           (sqlite-finalize statement))
+         (let ((statement (sqlite-prepare db insert-package-search-statement)))
+           (sqlite-bind-arguments statement
+                                  #:name (package-name package)
+                                  #:searchtext (package-description package))
+           (sqlite-fold cons '() statement)
+           (sqlite-finalize statement))
+         (vhash-consq package #t seen))))
+      (_ seen)))
 
   (mkdir-p (dirname cache-file))
-  (call-with-output-file cache-file
-    (lambda (port)
-      ;; Store the cache as a '.go' file.  This makes loading fast and reduces
-      ;; heap usage since some of the static data is directly mmapped.
-      (put-bytevector port
-                      (compile `'(,@exp)
-                               #:to 'bytecode
-                               #:opts '(#:to-file? #t)))))
+  (let ((db (sqlite-open cache-file)))
+    (sqlite-exec db schema)
+    (sqlite-exec db "BEGIN")
+    (fold-module-public-variables* (cut insert-package db <> <> <> <>)
+                                   vlist-null
+                                   (all-modules (%package-module-path)
+                                                #:warn
+                                                warn-about-load-error))
+    (sqlite-exec db "COMMIT;")
+    (sqlite-close db))
+
   cache-file)
 

-- 
2.23.0

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]