offload: Gracefully handle 'guix repl' protocol errors.

Fixes <https://issues.guix.gnu.org/59447>.
Reported by Mathieu Othacehe <othacehe@gnu.org>.

Previously, if a machine had a buggy 'guix repl', 'guix offload' would
crash with a backtrace instead of just ignoring the machine.

* guix/scripts/offload.scm (remote-inferior*): New procedure.
(check-machine-availability)[if-true]: New procedure.
Use 'remote-inferior*' and 'if-true'.
(check-machine-status): Use 'remote-inferior*'.
This commit is contained in:
Ludovic Courtès 2022-11-22 07:17:17 +01:00
parent 60bea07593
commit b2b9571935
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5

View file

@ -25,7 +25,7 @@ (define-module (guix scripts offload)
#:autoload (ssh auth) (userauth-public-key!) #:autoload (ssh auth) (userauth-public-key!)
#:autoload (ssh session) (make-session #:autoload (ssh session) (make-session
connect! get-error connect! get-error
disconnect! session-set!) disconnect! session-set! session-get)
#:autoload (ssh version) (zlib-support?) #:autoload (ssh version) (zlib-support?)
#:use-module (guix config) #:use-module (guix config)
#:use-module (guix records) #:use-module (guix records)
@ -34,7 +34,8 @@ (define-module (guix scripts offload)
send-files retrieve-files retrieve-files* send-files retrieve-files retrieve-files*
remote-inferior report-guile-error) remote-inferior report-guile-error)
#:use-module (guix store) #:use-module (guix store)
#:autoload (guix inferior) (inferior-eval close-inferior inferior?) #:autoload (guix inferior) (inferior-eval close-inferior
inferior? inferior-protocol-error?)
#:autoload (guix derivations) (read-derivation-from-file #:autoload (guix derivations) (read-derivation-from-file
derivation-file-name derivation-file-name
build-derivations) build-derivations)
@ -473,6 +474,15 @@ (define vec (list->vector lst))
(vector-set! vec j (vector-ref vec (- i 1))) (vector-set! vec j (vector-ref vec (- i 1)))
(loop (cons val result) (- i 1)))))))) (loop (cons val result) (- i 1))))))))
(define (remote-inferior* session)
"Like 'remote-inferior', but upon error return #f."
(or (guard (c ((inferior-protocol-error? c) #f))
(remote-inferior session))
(begin
(warning (G_ "failed to run 'guix repl' on machine '~a'~%")
(session-get session 'host))
#f)))
(define (choose-build-machine machines) (define (choose-build-machine machines)
"Return two values: the best machine among MACHINES and its build "Return two values: the best machine among MACHINES and its build
slot (which must later be released with 'release-build-slot'), or #f and #f." slot (which must later be released with 'release-build-slot'), or #f and #f."
@ -511,7 +521,7 @@ (define (machine-faster? m1 m2)
;; too costly to call it once for every machine. ;; too costly to call it once for every machine.
(let* ((session (false-if-exception (open-ssh-session best (let* ((session (false-if-exception (open-ssh-session best
%short-timeout))) %short-timeout)))
(node (and session (remote-inferior session))) (node (and session (remote-inferior* session)))
(load (and node (node-load node))) (load (and node (node-load node)))
(threshold (build-machine-overload-threshold best)) (threshold (build-machine-overload-threshold best))
(space (and node (node-free-disk-space node)))) (space (and node (node-free-disk-space node))))
@ -708,6 +718,11 @@ (define (build-machine=? m1 m2)
(and (string=? (build-machine-name m1) (build-machine-name m2)) (and (string=? (build-machine-name m1) (build-machine-name m2))
(= (build-machine-port m1) (build-machine-port m2)))) (= (build-machine-port m1) (build-machine-port m2))))
(define (if-true proc)
(lambda args
(when (every ->bool args)
(apply proc args))))
;; A given build machine may appear several times (e.g., once for ;; A given build machine may appear several times (e.g., once for
;; "x86_64-linux" and a second time for "i686-linux"); test them only once. ;; "x86_64-linux" and a second time for "i686-linux"); test them only once.
(let ((machines (filter pred (let ((machines (filter pred
@ -718,12 +733,12 @@ (define (build-machine=? m1 m2)
(let* ((names (map build-machine-name machines)) (let* ((names (map build-machine-name machines))
(sockets (map build-machine-daemon-socket machines)) (sockets (map build-machine-daemon-socket machines))
(sessions (map (cut open-ssh-session <> %short-timeout) machines)) (sessions (map (cut open-ssh-session <> %short-timeout) machines))
(nodes (map remote-inferior sessions))) (nodes (map remote-inferior* sessions)))
(for-each assert-node-has-guix nodes names) (for-each (if-true assert-node-has-guix) nodes names)
(for-each assert-node-repl nodes names) (for-each (if-true assert-node-repl) nodes names)
(for-each assert-node-can-import sessions nodes names sockets) (for-each (if-true assert-node-can-import) sessions nodes names sockets)
(for-each assert-node-can-export sessions nodes names sockets) (for-each (if-true assert-node-can-export) sessions nodes names sockets)
(for-each close-inferior nodes) (for-each (if-true close-inferior) nodes)
(for-each disconnect! sessions)))) (for-each disconnect! sessions))))
(define (check-machine-status machine-file pred) (define (check-machine-status machine-file pred)
@ -743,10 +758,9 @@ (define (build-machine=? m1 m2)
(define session (define session
(open-ssh-session machine %short-timeout)) (open-ssh-session machine %short-timeout))
(match (remote-inferior session) (match (remote-inferior* session)
(#f (#f
(warning (G_ "failed to run 'guix repl' on machine '~a'~%") #f)
(build-machine-name machine)))
((? inferior? inferior) ((? inferior? inferior)
(let ((now (car (gettimeofday)))) (let ((now (car (gettimeofday))))
(match (inferior-eval '(list (uname) (match (inferior-eval '(list (uname)