[Top][All Lists]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

(gumm personal ...) spider 0.90

From: Thien-Thi Nguyen
Subject: (gumm personal ...) spider 0.90
Date: Fri, 14 Dec 2001 00:45:28 -0800


see below for a sneak peek at the spider that collects info for (gumm
personal ...).  for some hint on what this means, see the protocol:

note that the protocol is not yet linked from the index in that
directory; that will happen at the Right Moment and Not Sooner...  on
the other hand, the registration cgi is already in place for the
adventurous -- that code follows the spider.

feedback requested on the protocol, the spider, the cgi, and/or gumm!
(please remove guile-sources from non-source postings.)

happy hacking,

# -*- scheme -*-  time-stamp: <2001-12-14 00:19:39 ttn>
exec guile -s $0 "$@"
;;; (gumm personal ...) spider version 0.90

;;; Copyright (C) 2001 Thien-Thi Nguyen
;;; This program is provided under the terms of the GNU GPL, version 2.
;;; See for details.

(debug-enable 'debug 'backtrace)

(define registration-queue-url "http://giblet/bin/gumm.cgi?get=1";)

(define *db*    "ttn")
(define *table* "gumm_personal_spider_food")
(define *defs*  '((url          text            "UNIQUE")
                  (lastcheck    timestamp)
                  (unr_count    int4)
                  (config       text)))

;;;;;; zonkme-when-installed: ttn-pers-scheme-0.28
(set! %load-path (cons "/home/ttn/build/ttn-pers-scheme" %load-path))

(use-modules ((ttn pgtable) :select (pgtable-manager where-clausifier)))

(define (where . args)
  (where-clausifier (apply string-append args)))

(define (M)
  (or (object-property M 'pgtable-manager)
      (let ((m (pgtable-manager *db* *table* *defs*)))
        (set-object-property! M 'pgtable-manager m)

(define (Msel . args) (apply ((M) 'select)     args))
(define (Mupd . args) (apply ((M) 'update-col) args))

(define (Mins alist) (((M) 'insert-alist) alist))

(define (Mdel . args)
  (((M) 'delete-rows) (apply where args)))

(use-modules ((ice-9 common-list) :select (every)))

(define *required-keys* '(trigger version release author tarball))

(define (url->raw-config url)
  (let ((raw-config (false-if-exception
                     (with-input-from-string (www:get url)
                       (lambda () (read))))))
    (and (list? raw-config)
         (every pair? raw-config)
         (every (lambda (key)
                  (assq key raw-config))

(define (w/o-trigger&->string trigger config)
  (object->string (delete (cons 'trigger trigger) config)))

(define (url<-config config)
  (assq-ref config 'trigger))

(define (elaborated-config . selection)
  (map (lambda (top-level)              ; promote config
         (let ((config (assq-ref top-level 'config)))
           (append (delete (assq 'config top-level) top-level)
                   (with-input-from-string config
                     (lambda () (read))))))
       (((M) 'tuples-result->alists) (apply Msel selection))))

(define (elaborated-config-1-url url)
  (car (elaborated-config "*" (where "url = '" url "'"))))

(use-modules ((ttn echo) :select (echo)))

;;; list

(define (list-one alist)
  (echo '------------)
  (for-each (lambda (pair)
              (format #t "~A -- ~A~%" (car pair) (cdr pair)))
            (let ((len (lambda (x)
                          (symbol->string (car x))))))
              (sort alist                                       ;;; cheesy
                    (lambda (a b)
                      (< (len a) (len b)))))))

(define (list!)
  (for-each list-one (elaborated-config "*")))

;;; add

(use-modules ((www main) :select (www:get)))

(define (read/clear-registration-queue!)
  (with-input-from-string (www:get registration-queue-url)
    (lambda ()
      (let loop ((ent (read)) (acc '()))
        (if (eof-object? ent)
            (reverse acc)
            (loop (read) (cons (cadr ent) acc)))))))    ; ugh

(use-modules ((ttn echo) :select (echo echo-n)))

(define (add-one! url config)
  (Mins `((url . ,url)
          (unr_count . 0)
          (lastcheck . ,(current-time))
          (config . ,(w/o-trigger&->string url config)))))

(use-modules (database postgres))

(define (add-one-maybe! url)
  (echo-n "adding:" url "... ")
  (let ((rv (cond ((url->raw-config url)
                   => (lambda (raw)
                        ;; integrated redundancy elimination / final check
                        (and (equal? url (url<-config raw))
                             (pg-result-status (add-one! url raw)))))
                  (else #f))))
    (echo (or rv "error"))
    (eq? 'PGRES_COMMAND_OK rv)))

(use-modules ((ice-9 common-list) :select (count-if))
             ((ttn echo)          :select (echo)))

(define (add!)
  (let* ((candidates (read/clear-registration-queue!))
         (new (count-if add-one-maybe! candidates)))
    (echo "new:" (length candidates) "candidates" new "added")))

;;; collect

(use-modules ((ttn echo) :select (echo echow)))

(define (collect-current-url url)       ; current means already in db
  (let ((update! (lambda (url cols vals)
                   (Mupd cols vals (string-append "url = '" url "'")))))
    (cond ((url->raw-config url)
           => (lambda (raw)
                ;; on trigger change, discard old trigger
                (let ((nurl (url<-config raw)))
                  (or (equal? url nurl)
                        (update! url
                                 '(url unr_count)
                                 `(,nurl 0))
                        (set! url nurl))))
                (update! url
                         '(lastcheck config)
                         `(,(current-time) ,(w/o-trigger&->string url raw)))
           (update! url '(unr_count) (list (sql-pre "unr_count + 1")))

(use-modules ((ttn echo) :select (echo)))

(define (note-changes keys before-lookup after-lookup)
  (echo (after-lookup 'url))
  (for-each (lambda (key)
              (let ((b (before-lookup key))
                    (a (after-lookup  key)))
                (or (equal? b a)
                    (echo "changed:" key "\n" b "\n" a))))

(use-modules ((ice-9 common-list) :select (pick union)))

(define (collect!)
  (let* ((all (elaborated-config "*"))
         (make-lookup (lambda (config)
                        (lambda (key)
                          (assq-ref config key))))
         (need-to-look? (lambda (config)
                          (let* ((lookup (make-lookup config))
                                 (last (lookup 'lastcheck))
                                 (freq (or (lookup 'spider-frequency) 1))
                                 (next (+ last (* 60 60 freq))))
                            (<= next (current-time)))))
         (needy (pick need-to-look? all))
         (unreachable 0))
    (for-each (lambda (needy-one)
                (let ((lookup (make-lookup needy-one)))
                  (cond ((collect-current-url (lookup 'url))
                         => (lambda (url)               ;;; may be different
                              (let ((after (elaborated-config-1-url url)))
                                (note-changes (union (map car needy-one)
                                                     (map car after))
                                              (make-lookup after)))))
                         (let ((toomuch (or (lookup 'unreachable-delist) 24)))
                           (and (<= toomuch (1+ (lookup 'unr_count)))
                                (echo "toomuch:" toomuch url "..."
                                      (Mdel "url = '" url "'"))))
                         (set! unreachable (1+ unreachable))))))
    (echo "collect:"
          (length all) "total"
          (length needy) "attempts"
          unreachable "unreachable")))

;;; dispatch

(define (do-command command)
  (case command
    ((list)    (list!))
    ((add)     (add!))
    ((collect) (collect!))
    (else      (error "bad command:" command))))

(define (command)
  (string->symbol (cadr (command-line))))

(do-command (command))

;;; .spider ends here
# -*- scheme -*- time-stamp: <2001-12-06 21:40:39 ttn>
exec guile -s $0 "$@"
;;; gumm.cgi version 0.90

;;; Copyright (C) 2001 Thien-Thi Nguyen
;;; This program is provided under the terms of the GNU GPL, version 2.
;;; See for details.

(define queue "var/gumm/registration-queue")

(use-modules (www cgi) (ttn echo))

(echo "Content-type: text/plain")


(cond ((cgi:value "get")
       (system (string-append "cat " queue))
       (with-output-to-file queue
         (lambda () (echo ";;; last get/clear:"
                          (strftime "%c" (gmtime (current-time))))))
       (exit #t)))

(define url (cgi:value "register"))

(cond ((not url)
       (echo "sorry")
       (exit #f)))

(echo-n "Registration in progress... ")
(define start (current-time))

(use-modules (www url))

(define parsed (url:parse url))

(define (bail-if v)
  (cond (v (echo "sorry, invalid url")
           (exit #f))))

;;(echow parsed)
(bail-if (not (eq? 'http (url:scheme parsed))))

(define host (url:host parsed))
(define port (url:port parsed))
(define path (url:path parsed))

;;(echo 'port port 'host host 'path path)
(bail-if (or (not host)
             (not path)
             (string-index path #\space)
             (string-index path #\newline)))

(use-modules (www http))

(define cnxn (false-if-exception (http:open host port)))

;;(echo 'cnxn cnxn)
(bail-if (not cnxn))

(let ((p (open-file queue "a")))
  (with-output-to-port p
    (lambda ()
      (echow (list (current-time) url)))))

(let ((finish (current-time)))
  (echo "done," (- finish start) "seconds.")
(echo "OK, looks like:")
(echo " " url)
(echo "has been added to the GUMM registration queue.")
(echo "Wait a couple hours to see if the spider is happy.")

(exit #t)

;;; gumm.cgi ends here

reply via email to

[Prev in Thread] Current Thread [Next in Thread]