substitute: Choose compression method based on past CPU usage.

This stems from the observation that substitute download can be
CPU-bound when high-speed networks are in use:

  https://lists.gnu.org/archive/html/guix-devel/2020-12/msg00177.html

* guix/narinfo.scm (decompresses-faster?): New procedure.
(narinfo-best-uri): Add #:fast-decompression?.
* guix/scripts/substitute.scm (%prefer-fast-decompression?): New
variable.
(call-with-cpu-usage-monitoring): New procedure.
(with-cpu-usage-monitoring): New macro.
(display-narinfo-data, process-substitution): Pass #:fast-decompression?
to 'narinfo-best-uri'.
(process-substitution): Wrap 'restore-file' call in
'with-cpu-usage-monitoring'.  Set '%prefer-fast-decompression?'.
This commit is contained in:
Ludovic Courtès 2021-03-14 15:05:30 +01:00
parent 5f9b28b231
commit 9da5ec7099
No known key found for this signature in database
GPG Key ID: 090B11993D9AEBB5
2 changed files with 73 additions and 11 deletions

View File

@ -1,5 +1,5 @@
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2014 Nikita Karetnikov <nikita@karetnikov.org>
;;; Copyright © 2018 Kyle Meyer <kyle@kyleam.com>
;;;
@ -297,9 +297,21 @@ this is a rough approximation."
(_ (or (string=? compression2 "none")
(string=? compression2 "gzip")))))
(define (narinfo-best-uri narinfo)
(define (decompresses-faster? compression1 compression2)
"Return true if COMPRESSION1 generally has a higher decompression throughput
than COMPRESSION2."
(match compression1
("none" #t)
("zstd" #t)
("gzip" (string=? compression2 "lzip"))
(_ #f)))
(define* (narinfo-best-uri narinfo #:key fast-decompression?)
"Select the \"best\" URI to download NARINFO's nar, and return three values:
the URI, its compression method (a string), and the compressed file size."
the URI, its compression method (a string), and the compressed file size.
When FAST-DECOMPRESSION? is true, prefer substitutes with faster
decompression (typically zstd) rather than substitutes with a higher
compression ratio (typically lzip)."
(define choices
(filter (match-lambda
((uri compression file-size)
@ -321,6 +333,13 @@ the URI, its compression method (a string), and the compressed file size."
(compresses-better? compression1 compression2))))
(_ #f))) ;we can't tell
(match (sort choices file-size<?)
(define (speed<? c1 c2)
(match c1
((uri1 compression1 . _)
(match c2
((uri2 compression2 . _)
(decompresses-faster? compression2 compression1))))))
(match (sort choices (if fast-decompression? (negate speed<?) file-size<?))
(((uri compression file-size) _ ...)
(values uri compression file-size))))

View File

@ -258,6 +258,27 @@ Internal tool to substitute a pre-built binary to a local build.\n"))
;;; Daemon/substituter protocol.
;;;
(define %prefer-fast-decompression?
;; Whether to prefer fast decompression over good compression ratios. This
;; serves in particular to choose between lzip (high compression ratio but
;; low decompression throughput) and zstd (lower compression ratio but high
;; decompression throughput).
#f)
(define (call-with-cpu-usage-monitoring proc)
(let ((before (times)))
(proc)
(let ((after (times)))
(if (= (tms:clock after) (tms:clock before))
0
(/ (- (tms:utime after) (tms:utime before))
(- (tms:clock after) (tms:clock before))
1.)))))
(define-syntax-rule (with-cpu-usage-monitoring exp ...)
"Evaluate EXP... Return its CPU usage as a fraction between 0 and 1."
(call-with-cpu-usage-monitoring (lambda () exp ...)))
(define (display-narinfo-data narinfo)
"Write to the current output port the contents of NARINFO in the format
expected by the daemon."
@ -270,7 +291,10 @@ expected by the daemon."
(for-each (cute format #t "~a/~a~%" (%store-prefix) <>)
(narinfo-references narinfo))
(let-values (((uri compression file-size) (narinfo-best-uri narinfo)))
(let-values (((uri compression file-size)
(narinfo-best-uri narinfo
#:fast-decompression?
%prefer-fast-decompression?)))
(format #t "~a\n~a\n"
(or file-size 0)
(or (narinfo-size narinfo) 0))))
@ -462,7 +486,9 @@ the current output port."
store-item))
(let-values (((uri compression file-size)
(narinfo-best-uri narinfo)))
(narinfo-best-uri narinfo
#:fast-decompression?
%prefer-fast-decompression?)))
(unless print-build-trace?
(format (current-error-port)
(G_ "Downloading ~a...~%") (uri->string uri)))
@ -500,11 +526,28 @@ the current output port."
((hashed get-hash)
(open-hash-input-port algorithm input)))
;; Unpack the Nar at INPUT into DESTINATION.
(restore-file hashed destination
#:dump-file (if (and destination-in-store?
deduplicate?)
dump-file/deduplicate*
dump-file))
(define cpu-usage
(with-cpu-usage-monitoring
(restore-file hashed destination
#:dump-file (if (and destination-in-store?
deduplicate?)
dump-file/deduplicate*
dump-file))))
;; Create a hysteresis: depending on CPU usage, favor compression
;; methods with faster decompression (like ztsd) or methods with better
;; compression ratios (like lzip). This stems from the observation that
;; substitution can be CPU-bound when high-speed networks are used:
;; <https://lists.gnu.org/archive/html/guix-devel/2020-12/msg00177.html>.
;; To simulate "slow" networking or changing conditions, run:
;; sudo tc qdisc add dev eno1 root tbf rate 512kbit latency 50ms burst 1540
;; and then cancel with:
;; sudo tc qdisc del dev eno1 root
(when (> cpu-usage .8)
(set! %prefer-fast-decompression? #t))
(when (< cpu-usage .2)
(set! %prefer-fast-decompression? #f))
(close-port hashed)
(close-port input)