(use-modules (gnu packages) (guix packages) (ice-9 match) (ice-9 time) (srfi srfi-1) (srfi srfi-26)) ;;; Implementation of sets using hash tables (define make-set make-hash-table) (define set-element-of? hashq-ref) (define set-add! (cut hashq-set! <> <> #t)) (define (set-fold proc init set) (hash-fold (lambda (element _ result) (proc element result)) init set)) (define (set-intersection set1 set2) "Return the intersection of SET1 with SET2." (let ((result (make-set))) (hash-for-each (lambda (element _) (when (set-element-of? set2 element) (set-add! result element))) set1) result)) (define (trigrams str) "Return all trigrams in STR." (if (< (string-length str) 3) '() (map (lambda (start) (substring str start (+ start 3))) (iota (- (string-length str) 2))))) ;; Inverted index (define index (make-hash-table)) (define (build-index!) "Return an inverted index of all trigrams in the descriptions of all packages." (fold-packages (lambda (package _) (for-each (lambda (trigram) (let ((set (hash-ref index trigram (make-set)))) (set-add! set package) (hash-set! index trigram set))) (trigrams (package-description package)))) #f) index) (define (match-package-fold-proc query package result) (if (string-contains (package-description package) query) (cons package result) result)) (define (brute-force-retrieve query) "Return names of all packages whose descriptions contain the string QUERY. Search brute force by folding over all packages." (fold-packages (cut match-package-fold-proc query <> <>) '())) (define (inverted-index-retrieve query) "Return names of all packages whose descriptions contain the string QUERY. Search using the pre-built inverted index." (match (trigrams query) ((first-trigram . remaining-trigrams) (set-fold (cut match-package-fold-proc query <> <>) '() (fold (lambda (trigram result) (set-intersection result (hash-ref index trigram (make-set)))) (hash-ref index first-trigram (make-set)) remaining-trigrams))) (() (brute-force-retrieve query)))) (define (time-us thunk) (define pair->sec (match-lambda ((sec . usec) (+ sec (/ usec 1e6))))) (let ((start (gettimeofday))) (thunk) (let ((stop (gettimeofday))) (- (pair->sec stop) (pair->sec start))))) (format #t "Built index in ~a seconds Brute force search took ~a seconds Inverted index search took ~a seconds " (time-us build-index!) (time-us (cut brute-force-retrieve "strategy")) (time-us (cut inverted-index-retrieve "strategy")))