profiles: Do not repeat entries in 'manifest' file.

Fixes <https://issues.guix.gnu.org/55499>.
Reported by Ricardo Wurmus <rekado@elephly.net>.

With this change, the manifest file created for:

  guix install r r-seurat r-cistopic r-monocle3 r-cicero-monocle3 r-assertthat

goes from 5.7M to 176K.  Likewise, on this profile, wall-clock time of:

  GUIX_PROFILING=gc guix package -I

goes from 0.7s to 0.1s, with heap usage going from 55M to 9M.

* guix/profiles.scm (manifest->gexp)[optional]: New procedure.
[entry->gexp]: Turn into a monadic procedure.  Return a 'repeated' sexp
if ENTRY was already visited before.
Adjust caller accordingly.  Bump manifest version.
(sexp->manifest)[sexp->manifest-entry]: Turn into a monadic procedure.
Add case for 'repeated' nodes.  Add each entry to the current state
vhash.
Add clause for version 4 manifests.
[sexp->manifest-entry/v3]: New procedure, with former
'sexp->manifest-entry' code.
* tests/profiles.scm ("deduplication of repeated entries"): New test.
* guix/build/profiles.scm (manifest-sexp->inputs+search-paths)[let-fields]:
New macro.
Use it.  Expect version 4.  Add clause for 'repeated' nodes.
This commit is contained in:
Ludovic Courtès 2022-05-31 17:17:10 +02:00
parent 9b8c442b25
commit 4ff12d1de7
No known key found for this signature in database
GPG Key ID: 090B11993D9AEBB5
3 changed files with 181 additions and 32 deletions

View File

@ -149,19 +149,33 @@ instead make DIRECTORY a \"real\" directory containing symlinks."
"Parse MANIFEST, an sexp as produced by 'manifest->gexp', and return two
values: the list of store items of its manifest entries, and the list of
search path specifications."
(define-syntax let-fields
(syntax-rules ()
;; Bind the fields NAME of LST to same-named variables in the lexical
;; scope of BODY.
((_ lst (name rest ...) body ...)
(let ((name (match (assq 'name lst)
((_ value) value)
(#f '()))))
(let-fields lst (rest ...) body ...)))
((_ lst () body ...)
(begin body ...))))
(match manifest ;this must match 'manifest->gexp'
(('manifest ('version 3)
(('manifest ('version 4)
('packages (entries ...)))
(let loop ((entries entries)
(inputs '())
(search-paths '()))
(match entries
(((name version output item
('propagated-inputs deps)
('search-paths paths) _ ...) . rest)
(loop (append rest deps) ;breadth-first traversal
(cons item inputs)
(append paths search-paths)))
(((name version output item fields ...) . rest)
(let ((paths search-paths))
(let-fields fields (propagated-inputs search-paths properties)
(loop (append rest propagated-inputs) ;breadth-first traversal
(cons item inputs)
(append search-paths paths)))))
((('repeated name version item) . rest)
(loop rest inputs search-paths))
(()
(values (reverse inputs)
(delete-duplicates
@ -212,4 +226,8 @@ search paths of MANIFEST's entries."
;; Write 'OUTPUT/etc/profile'.
(build-etc/profile output search-paths)))
;;; Local Variables:
;;; eval: (put 'let-fields 'scheme-indent-function 2)
;;; End:
;;; profile.scm ends here

View File

@ -454,32 +454,58 @@ denoting a specific output of a package."
(define (manifest->gexp manifest)
"Return a representation of MANIFEST as a gexp."
(define (optional name value)
(if (null? value)
#~()
#~((#$name #$value))))
(define (entry->gexp entry)
(match entry
(($ <manifest-entry> name version output (? string? path)
(deps ...) (search-paths ...) _ (properties ...))
#~(#$name #$version #$output #$path
(propagated-inputs #$(map entry->gexp deps))
(search-paths #$(map search-path-specification->sexp
search-paths))
#$@(if (null? properties)
#~()
#~((properties . #$properties)))))
(($ <manifest-entry> name version output package
(deps ...) (search-paths ...) _ (properties ...))
#~(#$name #$version #$output
(ungexp package (or output "out"))
(propagated-inputs #$(map entry->gexp deps))
(search-paths #$(map search-path-specification->sexp
search-paths))
#$@(if (null? properties)
#~()
#~((properties . #$properties)))))))
;; Maintain in state monad a vhash of visited entries, indexed by their
;; item, usually package objects (we cannot use the entry itself as an
;; index since identical entries are usually not 'eq?'). Use that vhash
;; to avoid repeating duplicate entries. This is particularly useful in
;; the presence of propagated inputs, where we could otherwise end up
;; repeating large trees.
(mlet %state-monad ((visited (current-state)))
(if (match (vhash-assq (manifest-entry-item entry) visited)
((_ . previous-entry)
(manifest-entry=? previous-entry entry))
(#f #f))
(return #~(repeated #$(manifest-entry-name entry)
#$(manifest-entry-version entry)
(ungexp (manifest-entry-item entry)
(manifest-entry-output entry))))
(mbegin %state-monad
(set-current-state (vhash-consq (manifest-entry-item entry)
entry visited))
(mlet %state-monad ((deps (mapm %state-monad entry->gexp
(manifest-entry-dependencies entry))))
(return
(match entry
(($ <manifest-entry> name version output (? string? path)
(_ ...) (search-paths ...) _ (properties ...))
#~(#$name #$version #$output #$path
#$@(optional 'propagated-inputs deps)
#$@(optional 'search-paths
(map search-path-specification->sexp
search-paths))
#$@(optional 'properties properties)))
(($ <manifest-entry> name version output package
(_deps ...) (search-paths ...) _ (properties ...))
#~(#$name #$version #$output
(ungexp package (or output "out"))
#$@(optional 'propagated-inputs deps)
#$@(optional 'search-paths
(map search-path-specification->sexp
search-paths))
#$@(optional 'properties properties))))))))))
(match manifest
(($ <manifest> (entries ...))
#~(manifest (version 3)
(packages #$(map entry->gexp entries))))))
#~(manifest (version 4)
(packages #$(run-with-state
(mapm %state-monad entry->gexp entries)
vlist-null))))))
(define (find-package name version)
"Return a package from the distro matching NAME and possibly VERSION. This
@ -520,14 +546,15 @@ procedure is here for backward-compatibility and will eventually vanish."
(item item)
(parent parent))))
(define* (sexp->manifest-entry sexp #:optional (parent (delay #f)))
(define* (sexp->manifest-entry/v3 sexp #:optional (parent (delay #f)))
;; Read SEXP as a version 3 manifest entry.
(match sexp
((name version output path
('propagated-inputs deps)
('search-paths search-paths)
extra-stuff ...)
;; For each of DEPS, keep a promise pointing to ENTRY.
(letrec* ((deps* (map (cut sexp->manifest-entry <> (delay entry))
(letrec* ((deps* (map (cut sexp->manifest-entry/v3 <> (delay entry))
deps))
(entry (manifest-entry
(name name)
@ -542,6 +569,56 @@ procedure is here for backward-compatibility and will eventually vanish."
'())))))
entry))))
(define-syntax let-fields
(syntax-rules ()
;; Bind the fields NAME of LST to same-named variables in the lexical
;; scope of BODY.
((_ lst (name rest ...) body ...)
(let ((name (match (assq 'name lst)
((_ value) value)
(#f '()))))
(let-fields lst (rest ...) body ...)))
((_ lst () body ...)
(begin body ...))))
(define* (sexp->manifest-entry sexp #:optional (parent (delay #f)))
(match sexp
(('repeated name version path)
;; This entry is the same as another one encountered earlier; look it
;; up and return it.
(mlet %state-monad ((visited (current-state))
(key -> (list name version path)))
(match (vhash-assoc key visited)
(#f
(raise (formatted-message
(G_ "invalid repeated entry in profile: ~s")
sexp)))
((_ . entry)
(return entry)))))
((name version output path fields ...)
(let-fields fields (propagated-inputs search-paths properties)
(mlet* %state-monad
((entry -> #f)
(deps (mapm %state-monad
(cut sexp->manifest-entry <> (delay entry))
propagated-inputs))
(visited (current-state))
(key -> (list name version path)))
(set! entry ;XXX: emulate 'letrec*'
(manifest-entry
(name name)
(version version)
(output output)
(item path)
(dependencies deps)
(search-paths (map sexp->search-path-specification
search-paths))
(parent parent)
(properties properties)))
(mbegin %state-monad
(set-current-state (vhash-cons key entry visited))
(return entry)))))))
(match sexp
(('manifest ('version 0)
('packages ((name version output path) ...)))
@ -608,7 +685,15 @@ procedure is here for backward-compatibility and will eventually vanish."
;; Version 3 represents DEPS as full-blown manifest entries.
(('manifest ('version 3 minor-version ...)
('packages (entries ...)))
(manifest (map sexp->manifest-entry entries)))
(manifest (map sexp->manifest-entry/v3 entries)))
;; Version 4 deduplicates repeated entries and makes manifest entry fields
;; such as 'propagated-inputs' and 'search-paths' optional.
(('manifest ('version 4 minor-version ...)
('packages (entries ...)))
(manifest (run-with-state
(mapm %state-monad sexp->manifest-entry entries)
vlist-null)))
(_
(raise (condition
(&message (message "unsupported manifest format")))))))
@ -2317,4 +2402,8 @@ PROFILE refers to, directly or indirectly, or PROFILE."
%known-shorthand-profiles)
profile))
;;; Local Variables:
;;; eval: (put 'let-fields 'scheme-indent-function 2)
;;; End:
;;; profiles.scm ends here

View File

@ -586,6 +586,48 @@
#:locales? #f)))
(return #f)))))
(test-assertm "deduplication of repeated entries"
;; Make sure the 'manifest' file does not duplicate identical entries.
;; See <https://issues.guix.gnu.org/55499>.
(mlet* %store-monad ((p0 -> (dummy-package "p0"
(build-system trivial-build-system)
(arguments
`(#:guile ,%bootstrap-guile
#:builder (mkdir (assoc-ref %outputs "out"))))
(propagated-inputs
`(("guile" ,%bootstrap-guile)))))
(p1 -> (package
(inherit p0)
(name "p1")))
(drv (profile-derivation (packages->manifest
(list p0 p1))
#:hooks '()
#:locales? #f)))
(mbegin %store-monad
(built-derivations (list drv))
(let ((file (string-append (derivation->output-path drv)
"/manifest"))
(manifest (profile-manifest (derivation->output-path drv))))
(define (contains-repeated? sexp)
(match sexp
(('repeated _ ...) #t)
((lst ...) (any contains-repeated? sexp))
(_ #f)))
(return (and (contains-repeated? (call-with-input-file file read))
;; MANIFEST has two entries for %BOOTSTRAP-GUILE since
;; it's propagated both from P0 and from P1. When
;; reading a 'repeated' node, 'read-manifest' should
;; reuse the previously-read entry so the two
;; %BOOTSTRAP-GUILE entries must be 'eq?'.
(match (manifest-entries manifest)
(((= manifest-entry-dependencies (dep0))
(= manifest-entry-dependencies (dep1)))
(and (string=? (manifest-entry-name dep0)
(package-name %bootstrap-guile))
(eq? dep0 dep1))))))))))
(test-assertm "no collision"
;; Here we have an entry that is "lowered" (its 'item' field is a store file
;; name) and another entry (its 'item' field is a package) that is