guix-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[bug#48437] [PATCH v2] lint: archival: Lookup content in Disarchive data


From: Ludovic Courtès
Subject: [bug#48437] [PATCH v2] lint: archival: Lookup content in Disarchive database.
Date: Fri, 21 May 2021 12:27:03 +0200

* guix/lint.scm (lookup-disarchive-spec): New procedure.
(check-archival): When 'lookup-content' returns #f, call
'lookup-disarchive-spec'.  Call 'lookup-directory' on the result of
'lookup-directory'.
* guix/download.scm (%disarchive-mirrors): Make public.
* tests/lint.scm ("archival: missing content"): Set
'%disarchive-mirrors'.
("archival: content unavailable but disarchive available"): New test.
---
 guix/download.scm |  1 +
 guix/lint.scm     | 62 ++++++++++++++++++++++++++++++++++++++++++++---
 tests/lint.scm    | 34 +++++++++++++++++++++++---
 3 files changed, 89 insertions(+), 8 deletions(-)

Hi!

This new version checks that the SWH IDs that appear in a Disarchive
entry are indeed available at archive.softwareheritage.org.

It also adds a test for that.

Ludo'.

diff --git a/guix/download.scm b/guix/download.scm
index 72094e7318..b6eb97e6fa 100644
--- a/guix/download.scm
+++ b/guix/download.scm
@@ -35,6 +35,7 @@
   #:use-module (srfi srfi-1)
   #:use-module (srfi srfi-26)
   #:export (%mirrors
+            %disarchive-mirrors
             (url-fetch* . url-fetch)
             url-fetch/executable
             url-fetch/tarbomb
diff --git a/guix/lint.scm b/guix/lint.scm
index 1bebfe03d3..a2d6418b85 100644
--- a/guix/lint.scm
+++ b/guix/lint.scm
@@ -30,6 +30,7 @@
 
 (define-module (guix lint)
   #:use-module (guix store)
+  #:autoload   (guix base16) (bytevector->base16-string)
   #:use-module (guix base32)
   #:use-module (guix diagnostics)
   #:use-module (guix download)
@@ -1227,6 +1228,43 @@ upstream releases")
                             #:field 'source)))))))
 
 
+(define (lookup-disarchive-spec hash)
+  "If Disarchive mirrors have a spec for HASH, return the list of SWH
+directory identifiers the spec refers to.  Otherwise return #f."
+  (define (extract-swh-id spec)
+    ;; Return the list of SWH directory identifiers SPEC refers to, where SPEC
+    ;; is a Disarchive sexp.  Instead of attempting to parse it, traverse it
+    ;; in a pretty unintelligent fashion.
+    (let loop ((sexp spec)
+               (ids '()))
+      (match sexp
+        ((? string? str)
+         (let ((prefix "swh:1:dir:"))
+           (if (string-prefix? prefix str)
+               (cons (string-drop str (string-length prefix)) ids)
+               ids)))
+        ((head tail ...)
+         (loop tail (loop head ids)))
+        (_ ids))))
+
+  (any (lambda (mirror)
+         (with-networking-fail-safe
+          (format #f (G_ "failed to access Disarchive database at ~a")
+                  mirror)
+          #f
+          (guard (c ((http-get-error? c) #f))
+            (let* ((url (string-append mirror
+                                       (symbol->string
+                                        (content-hash-algorithm hash))
+                                       "/"
+                                       (bytevector->base16-string
+                                        (content-hash-value hash))))
+                   (port (http-fetch (string->uri url) #:text? #t))
+                   (spec (read port)))
+              (close-port port)
+              (extract-swh-id spec)))))
+       %disarchive-mirrors))
+
 (define (check-archival package)
   "Check whether PACKAGE's source code is archived on Software Heritage.  If
 it's not, and if its source code is a VCS snapshot, then send a \"save\"
@@ -1302,10 +1340,26 @@ try again later")
                                         (symbol->string
                                          (content-hash-algorithm hash)))
                    (#f
-                    (list (make-warning package
-                                        (G_ "source not archived on Software \
-Heritage")
-                                        #:field 'source)))
+                    ;; If SWH doesn't have HASH as is, it may be because it's
+                    ;; a hand-crafted tarball.  In that case, check whether
+                    ;; the Disarchive database has an entry for that tarball.
+                    (match (lookup-disarchive-spec hash)
+                      (#f
+                       (list (make-warning package
+                                           (G_ "source not archived on 
Software \
+Heritage and missing from the Disarchive database")
+                                           #:field 'source)))
+                      (directory-ids
+                       (match (find (lambda (id)
+                                      (not (lookup-directory id)))
+                                    directory-ids)
+                         (#f '())
+                         (id
+                          (list (make-warning package
+                                              (G_ "
+Disarchive entry refers to non-existent SWH directory '~a'")
+                                              (list id)
+                                              #:field 'source)))))))
                    ((? content?)
                     '())))
                '()))))
diff --git a/tests/lint.scm b/tests/lint.scm
index a2c8665142..d54fafc1d2 100644
--- a/tests/lint.scm
+++ b/tests/lint.scm
@@ -1,7 +1,7 @@
 ;;; GNU Guix --- Functional package management for GNU
 ;;; Copyright © 2012, 2013 Cyril Roelandt <tipecaml@gmail.com>
 ;;; Copyright © 2014, 2015, 2016 Eric Bavier <bavier@member.fsf.org>
-;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès 
<ludo@gnu.org>
+;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès 
<ludo@gnu.org>
 ;;; Copyright © 2015, 2016 Mathieu Lirzin <mthl@gnu.org>
 ;;; Copyright © 2016 Hartmut Goebel <h.goebel@crazy-compilers.com>
 ;;; Copyright © 2017 Alex Kost <alezost@gmail.com>
@@ -1008,10 +1008,13 @@
                      (method url-fetch)
                      (uri "http://example.org/foo.tgz";)
                      (sha256 (make-bytevector 32))))
-         (warnings (with-http-server '((404 "Not archived."))
+         (warnings (with-http-server '((404 "Not archived.")
+                                       (404 "Not in Disarchive database."))
                      (parameterize ((%swh-base-url (%local-url)))
-                       (check-archival (dummy-package "x"
-                                                      (source origin)))))))
+                       (mock ((guix download) %disarchive-mirrors
+                              (list (%local-url)))
+                             (check-archival (dummy-package "x"
+                                                            (source 
origin))))))))
     (warning-contains? "not archived" warnings)))
 
 (test-equal "archival: content available"
@@ -1027,6 +1030,29 @@
       (parameterize ((%swh-base-url (%local-url)))
         (check-archival (dummy-package "x" (source origin)))))))
 
+(test-equal "archival: content unavailable but disarchive available"
+  '()
+  (let* ((origin   (origin
+                     (method url-fetch)
+                     (uri "http://example.org/foo.tgz";)
+                     (sha256 (make-bytevector 32))))
+         (disarchive (object->string
+                      '(disarchive (version 0)
+                                   ...
+                                   
"swh:1:dir:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")))
+         ;; https://archive.softwareheritage.org/api/1/directory/
+         (directory "[ { \"checksums\": {},
+                         \"dir_id\": 
\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\",
+                         \"type\": \"file\",
+                         \"name\": \"README\"
+                         \"length\": 42 } ]"))
+    (with-http-server `((404 "")                  ;lookup-content
+                        (200 ,disarchive)         ;Disarchive database lookup
+                        (200 ,directory))         ;lookup-directory
+      (mock ((guix download) %disarchive-mirrors (list (%local-url)))
+            (parameterize ((%swh-base-url (%local-url)))
+              (check-archival (dummy-package "x" (source origin))))))))
+
 (test-assert "archival: missing revision"
   (let* ((origin   (origin
                      (method git-fetch)
-- 
2.31.1






reply via email to

[Prev in Thread] Current Thread [Next in Thread]