offload: Rewrite to make direct RPCs to the remote daemon.

* guix/scripts/offload.scm (<build-machine>)[daemon-socket]: New field.
(connect-to-remote-daemon): New procedure.
(%gc-root-file, register-gc-root, remove-gc-roots, offload): Remove.
(transfer-and-offload): Rewrite using 'connect-to-remote-daemon' and
RPCs over SSH.
(store-import-channel, store-export-channel): New procedures.
(send-files, retrieve-files): Rewrite using these.
This commit is contained in:
Ludovic Courtès 2016-11-05 00:47:34 +01:00
parent e8a5db80d5
commit cf283dd92e
No known key found for this signature in database
GPG Key ID: 090B11993D9AEBB5
2 changed files with 167 additions and 192 deletions

View File

@ -921,6 +921,10 @@ Port number of SSH server on the machine.
The SSH private key file to use when connecting to the machine, in
OpenSSH format.
@item @code{daemon-socket} (default: @code{"/var/guix/daemon-socket/socket"})
File name of the Unix-domain socket @command{guix-daemon} is listening
to on that machine.
@item @code{parallel-builds} (default: @code{1})
The number of builds that may run in parallel on the machine.

View File

@ -21,6 +21,9 @@
#:use-module (ssh auth)
#:use-module (ssh session)
#:use-module (ssh channel)
#:use-module (ssh popen)
#:use-module (ssh dist)
#:use-module (ssh dist node)
#:use-module (guix config)
#:use-module (guix records)
#:use-module (guix store)
@ -71,6 +74,8 @@
(private-key build-machine-private-key ; file name
(default (user-openssh-private-key)))
(host-key build-machine-host-key) ; string
(daemon-socket build-machine-daemon-socket ; string
(default "/var/guix/daemon-socket/socket"))
(parallel-builds build-machine-parallel-builds ; number
(default 1))
(speed build-machine-speed ; inexact real
@ -197,6 +202,53 @@ instead of '~a' of type '~a'~%")
session))
(define* (connect-to-remote-daemon session
#:optional
(socket-name "/var/guix/daemon-socket/socket"))
"Connect to the remote build daemon listening on SOCKET-NAME over SESSION,
an SSH session. Return a <nix-server> object."
(define redirect
;; Code run in SESSION to redirect the remote process' stdin/stdout to the
;; daemon's socket, à la socat. The SSH protocol supports forwarding to
;; Unix-domain sockets but libssh doesn't have an API for that, hence this
;; hack.
`(begin
(use-modules (ice-9 match) (rnrs io ports))
(let ((sock (socket AF_UNIX SOCK_STREAM 0))
(stdin (current-input-port))
(stdout (current-output-port)))
(setvbuf stdin _IONBF)
(setvbuf stdout _IONBF)
(connect sock AF_UNIX ,socket-name)
(let loop ()
(match (select (list stdin sock) '() (list stdin stdout sock))
((reads writes ())
(when (memq stdin reads)
(match (get-bytevector-some stdin)
((? eof-object?)
(primitive-exit 0))
(bv
(put-bytevector sock bv))))
(when (memq sock reads)
(match (get-bytevector-some sock)
((? eof-object?)
(primitive-exit 0))
(bv
(put-bytevector stdout bv))))
(loop))
(_
(primitive-exit 1)))))))
(let ((channel
(open-remote-pipe* session OPEN_BOTH
;; Sort-of shell-quote REDIRECT.
"guile" "-c"
(object->string
(object->string redirect)))))
(open-connection #:port channel)))
(define* (remote-pipe session command
#:key (quote? #t))
"Run COMMAND (a list) on SESSION, and return an open input/output port,
@ -306,116 +358,6 @@ hook."
(set-port-revealed! port 1)
port))
(define %gc-root-file
;; File name of the temporary GC root we install.
(format #f "offload-~a-~a" (gethostname) (getpid)))
(define (register-gc-root file session)
"Mark FILE, a store item, as a garbage collector root in SESSION. Return
the exit status, zero on success."
(define script
`(begin
(use-modules (guix config))
;; Note: we can't use 'add-indirect-root' because dangling links under
;; gcroots/auto are automatically deleted by the GC. This strategy
;; doesn't have this problem, but it requires write access to that
;; directory.
(let ((root-directory (string-append %state-directory
"/gcroots/tmp")))
(catch 'system-error
(lambda ()
(mkdir root-directory))
(lambda args
(unless (= EEXIST (system-error-errno args))
(error "failed to create remote GC root directory"
root-directory (system-error-errno args)))))
(catch 'system-error
(lambda ()
(symlink ,file
(string-append root-directory "/" ,%gc-root-file)))
(lambda args
;; If FILE already exists, we can assume that either it's a stale
;; reference (which is fine), or another process is already
;; building the derivation represented by FILE (which is fine
;; too.) Thus, do nothing in that case.
(unless (= EEXIST (system-error-errno args))
(apply throw args)))))))
(let ((pipe (remote-pipe session
`("guile" "-c" ,(object->string script)))))
(read-string pipe)
(let ((status (channel-get-exit-status pipe)))
(close-port pipe)
(unless (zero? status)
;; Better be safe than sorry: if we ignore the error here, then FILE
;; may be GC'd just before we start using it.
(leave (_ "failed to register GC root for '~a' on '~a' (status: ~a)~%")
file (session-get session 'host) status)))))
(define (remove-gc-roots session)
"Remove in SESSION the GC roots previously installed with
'register-gc-root'."
(define script
`(begin
(use-modules (guix config) (ice-9 ftw)
(srfi srfi-1) (srfi srfi-26))
(let ((root-directory (string-append %state-directory
"/gcroots/tmp")))
(false-if-exception
(delete-file
(string-append root-directory "/" ,%gc-root-file)))
;; These ones were created with 'guix build -r' (there can be more
;; than one in case of multiple-output derivations.)
(let ((roots (filter (cut string-prefix? ,%gc-root-file <>)
(scandir "."))))
(for-each (lambda (file)
(false-if-exception (delete-file file)))
roots)))))
(let ((pipe (remote-pipe session
`("guile" "-c" ,(object->string script)))))
(read-string pipe)
(close-port pipe)))
(define* (offload drv session
#:key print-build-trace? (max-silent-time 3600)
build-timeout (log-port (build-log-port)))
"Perform DRV in SESSION, assuming DRV and its prerequisites are available
there, and write the build log to LOG-PORT. Return the exit status."
;; Normally DRV has already been protected from GC when it was transferred.
;; The '-r' flag below prevents the build result from being GC'd.
(let ((pipe (remote-pipe session
`("guix" "build"
"-r" ,%gc-root-file
,(format #f "--max-silent-time=~a"
max-silent-time)
,@(if build-timeout
(list (format #f "--timeout=~a"
build-timeout))
'())
,(derivation-file-name drv))
;; Since 'guix build' writes the build log to its
;; stderr, everything will go directly to LOG-PORT.
;; #:error-port log-port ;; FIXME
)))
;; Make standard error visible.
(channel-set-stream! pipe 'stderr)
(let loop ((line (read-line pipe)))
(unless (eof-object? line)
(display line log-port)
(newline log-port)
(loop (read-line pipe))))
(let loop ((status (channel-get-exit-status pipe)))
(close-port pipe)
status)))
(define* (transfer-and-offload drv machine
#:key
(inputs '())
@ -429,99 +371,128 @@ MACHINE."
(define session
(open-ssh-session machine))
(when (begin
(register-gc-root (derivation-file-name drv) session)
(send-files (cons (derivation-file-name drv) inputs)
session))
(format (current-error-port) "offloading '~a' to '~a'...~%"
(derivation-file-name drv) (build-machine-name machine))
(format (current-error-port) "@ build-remote ~a ~a~%"
(derivation-file-name drv) (build-machine-name machine))
(define store
(connect-to-remote-daemon session
(build-machine-daemon-socket machine)))
(let ((status (offload drv session
#:print-build-trace? print-build-trace?
#:max-silent-time max-silent-time
#:build-timeout build-timeout)))
(if (zero? status)
(begin
(retrieve-files outputs session)
(remove-gc-roots session)
(format (current-error-port)
"done with offloaded '~a'~%"
(derivation-file-name drv)))
(begin
(remove-gc-roots session)
(format (current-error-port)
"derivation '~a' offloaded to '~a' failed \
with exit code ~a~%"
(derivation-file-name drv)
(build-machine-name machine)
status)
(set-build-options store
#:print-build-trace print-build-trace?
#:max-silent-time max-silent-time
#:timeout build-timeout)
;; Use exit code 100 for a permanent build failure. The daemon
;; interprets other non-zero codes as transient build failures.
(primitive-exit 100))))))
;; Protect DRV from garbage collection.
(add-temp-root store (derivation-file-name drv))
(define (send-files files session)
"Send the subset of FILES that's missing to SESSION's store. Return #t on
success, #f otherwise."
(define (missing-files files)
;; Return the subset of FILES not already on SESSION. Use 'head' as a
;; hack to make sure the remote end stops reading when we're done.
(let* ((pipe (remote-pipe session
`("guix" "archive" "--missing")
#:quote? #f)))
(format pipe "~{~a~%~}" files)
(channel-send-eof pipe)
(string-tokenize (read-string pipe))))
(send-files (cons (derivation-file-name drv) inputs)
store)
(format (current-error-port) "offloading '~a' to '~a'...~%"
(derivation-file-name drv) (build-machine-name machine))
(format (current-error-port) "@ build-remote ~a ~a~%"
(derivation-file-name drv) (build-machine-name machine))
(guard (c ((nix-protocol-error? c)
(format (current-error-port)
(_ "derivation '~a' offloaded to '~a' failed: ~a~%")
(derivation-file-name drv)
(build-machine-name machine)
(nix-protocol-error-message c))
;; Use exit code 100 for a permanent build failure. The daemon
;; interprets other non-zero codes as transient build failures.
(primitive-exit 100)))
(build-derivations store (list drv)))
(retrieve-files outputs store)
(format (current-error-port) "done with offloaded '~a'~%"
(derivation-file-name drv)))
(define (store-import-channel session)
"Return an output port to which archives to be exported to SESSION's store
can be written."
;; Using the 'import-paths' RPC on a remote store would be slow because it
;; makes a round trip every time 32 KiB have been transferred. This
;; procedure instead opens a separate channel to use the remote
;; 'import-paths' procedure, which consumes all the data in a single round
;; trip.
(define import
`(begin
(use-modules (guix))
(with-store store
(setvbuf (current-input-port) _IONBF)
(import-paths store (current-input-port)))))
(open-remote-output-pipe session
(string-join
`("guile" "-c"
,(object->string
(object->string import))))))
(define (store-export-channel session files)
"Return an input port from which an export of FILES from SESSION's store can
be read."
;; Same as above: this is more efficient than calling 'export-paths' on a
;; remote store.
(define export
`(begin
(use-modules (guix))
(with-store store
(setvbuf (current-output-port) _IONBF)
(export-paths store ',files (current-output-port)))))
(open-remote-input-pipe session
(string-join
`("guile" "-c"
,(object->string
(object->string export))))))
(define (send-files files remote)
"Send the subset of FILES that's missing to REMOTE, a remote store."
(with-store store
(guard (c ((nix-protocol-error? c)
(warning (_ "failed to export files for '~a': ~s~%")
(session-get session 'host) c)
#f))
;; Compute the subset of FILES missing on SESSION, and send them in
;; topologically sorted order so that they can actually be imported.
(let* ((sorted (topologically-sorted store files))
(session (channel-get-session (nix-server-socket remote)))
(node (make-node session))
(missing (node-eval node
`(begin
(use-modules (guix)
(srfi srfi-1) (srfi srfi-26))
;; Compute the subset of FILES missing on SESSION, and send them in
;; topologically sorted order so that they can actually be imported.
(let* ((files (missing-files (topologically-sorted store files)))
(pipe (remote-pipe session
'("guix" "archive" "--import")
#:quote? #f)))
(format #t (_ "sending ~a store files to '~a'...~%")
(length files) (session-get session 'host))
(with-store store
(remove (cut valid-path? store <>)
',sorted)))))
(port (store-import-channel session)))
(format #t (_ "sending ~a store files to '~a'...~%")
(length missing) (session-get session 'host))
(export-paths store files pipe)
(channel-send-eof pipe)
(export-paths store missing port)
;; Wait for the remote process to complete.
(let ((status (channel-get-exit-status pipe)))
(close pipe)
status)))))
;; Tell the remote process that we're done. (In theory the
;; end-of-archive mark of 'export-paths' would be enough, but in
;; practice it's not.)
(channel-send-eof port)
(define (retrieve-files files session)
;; Wait for completion of the remote process.
(let ((result (zero? (channel-get-exit-status port))))
(close-port port)
result))))
(define (retrieve-files files remote)
"Retrieve FILES from SESSION's store, and import them."
(define host
(session-get session 'host))
(let* ((session (channel-get-session (nix-server-socket remote)))
(host (session-get session 'host))
(port (store-export-channel session files)))
(format #t (_ "retrieving ~a files from '~a'...~%")
(length files) host)
(let ((pipe (remote-pipe session
`("guix" "archive" "--export" ,@files)
#:quote? #f)))
(and pipe
(with-store store
(guard (c ((nix-protocol-error? c)
(warning (_ "failed to import files from '~a': ~s~%")
host c)
#f))
(format (current-error-port) "retrieving ~a files from '~a'...~%"
(length files) host)
;; We cannot use the 'import-paths' RPC here because we already
;; hold the locks for FILES.
(restore-file-set pipe
#:log-port (current-error-port)
#:lock? #f)
(close-port pipe))))))
;; We cannot use the 'import-paths' RPC here because we already
;; hold the locks for FILES.
(let ((result (restore-file-set port
#:log-port (current-error-port)
#:lock? #f)))
(close-port port)
result)))
;;;