store-copy: 'populate-store' can optionally deduplicate files.

Until now deduplication was performed as an additional pass after
copying files, which involve re-traversing all the files that had just
been copied.

* guix/store/deduplication.scm (copy-file/deduplicate): New procedure.
* tests/store-deduplication.scm ("copy-file/deduplicate"): New test.
* guix/build/store-copy.scm (populate-store): Add #:deduplicate?
parameter and honor it.
* tests/gexp.scm ("gexp->derivation, store copy"): Pass #:deduplicate? #f
to 'populate-store'.
* gnu/build/image.scm (initialize-root-partition): Pass #:deduplicate?
to 'populate-store'.  Pass #:deduplicate? #f to 'register-closure'.
* gnu/build/vm.scm (root-partition-initializer): Likewise.
* gnu/build/install.scm (populate-single-profile-directory): Pass
 #:deduplicate? #f to 'populate-store'.
* gnu/build/linux-initrd.scm (build-initrd): Likewise.
* guix/scripts/pack.scm (self-contained-tarball)[import-module?]: New
procedure.
[build]: Pass it as an argument to 'source-module-closure'.
* guix/scripts/pack.scm (squashfs-image)[build]: Wrap in
'with-extensions'.
* gnu/system/linux-initrd.scm (expression->initrd)[import-module?]: New
procedure.
[builder]: Pass it to 'source-module-closure'.
* gnu/system/install.scm (cow-store-service-type)[import-module?]: New
procedure.  Pass it to 'source-module-closure'.
This commit is contained in:
Ludovic Courtès 2020-12-10 15:12:34 +01:00
parent dea1ee1fd7
commit 6a060ff27f
No known key found for this signature in database
GPG Key ID: 090B11993D9AEBB5
11 changed files with 196 additions and 128 deletions

View File

@ -186,7 +186,8 @@ rest of the store when registering the closures. SYSTEM-DIRECTORY is the name
of the directory of the 'system' derivation. Pass WAL-MODE? to
register-closure."
(populate-root-file-system system-directory root)
(populate-store references-graphs root)
(populate-store references-graphs root
#:deduplicate? deduplicate?)
;; Populate /dev.
(when make-device-nodes
@ -195,7 +196,7 @@ register-closure."
(when register-closures?
(for-each (lambda (closure)
(register-closure root closure
#:deduplicate? deduplicate?
#:deduplicate? #f
#:wal-mode? wal-mode?))
references-graphs))

View File

@ -214,7 +214,8 @@ This is used to create the self-contained tarballs with 'guix pack'."
(symlink old (scope new)))
;; Populate the store.
(populate-store (list closure) directory)
(populate-store (list closure) directory
#:deduplicate? #f)
(when database
(install-database-and-gc-roots directory database profile

View File

@ -127,7 +127,8 @@ REFERENCES-GRAPHS."
(mkdir "contents")
;; Copy the closures of all the items referenced in REFERENCES-GRAPHS.
(populate-store references-graphs "contents")
(populate-store references-graphs "contents"
#:deduplicate? #f)
(with-directory-excursion "contents"
;; Make '/init'.

View File

@ -395,7 +395,8 @@ system that is passed to 'populate-root-file-system'."
(when copy-closures?
;; Populate the store.
(populate-store (map (cut string-append "/xchg/" <>) closures)
target))
target
#:deduplicate? deduplicate?))
;; Populate /dev.
(make-device-nodes target)
@ -412,7 +413,7 @@ system that is passed to 'populate-root-file-system'."
(for-each (lambda (closure)
(register-closure target
(string-append "/xchg/" closure)
#:deduplicate? deduplicate?))
#:deduplicate? #f))
closures)
(unless copy-closures?
(umount target-store)))

View File

@ -1,5 +1,5 @@
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2015 Mark H Weaver <mhw@netris.org>
;;; Copyright © 2016 Andreas Enge <andreas@enge.fr>
;;; Copyright © 2017 Marius Bakke <mbakke@fastmail.com>
@ -176,6 +176,13 @@ manual."
(shepherd-service-type
'cow-store
(lambda _
(define (import-module? module)
;; Since we don't use deduplication support in 'populate-store', don't
;; import (guix store deduplication) and its dependencies, which
;; includes Guile-Gcrypt.
(and (guix-module-name? module)
(not (equal? module '(guix store deduplication)))))
(shepherd-service
(requirement '(root-file-system user-processes))
(provision '(cow-store))
@ -190,7 +197,8 @@ the given target.")
,@%default-modules))
(start
(with-imported-modules (source-module-closure
'((gnu build install)))
'((gnu build install))
#:select? import-module?)
#~(case-lambda
((target)
(mount-cow-store target #$%backing-directory)

View File

@ -76,12 +76,20 @@ the derivations referenced by EXP are automatically copied to the initrd."
(define init
(program-file "init" exp #:guile guile))
(define (import-module? module)
;; Since we don't use deduplication support in 'populate-store', don't
;; import (guix store deduplication) and its dependencies, which includes
;; Guile-Gcrypt. That way we can run tests with '--bootstrap'.
(and (guix-module-name? module)
(not (equal? module '(guix store deduplication)))))
(define builder
;; Do not use "guile-zlib" extension here, otherwise it would drag the
;; non-static "zlib" package to the initrd closure. It is not needed
;; anyway because the modules are stored uncompressed within the initrd.
(with-imported-modules (source-module-closure
'((gnu build linux-initrd)))
'((gnu build linux-initrd))
#:select? import-module?)
#~(begin
(use-modules (gnu build linux-initrd))

View File

@ -20,6 +20,7 @@
#:use-module ((guix build utils) #:hide (copy-recursively))
#:use-module (guix sets)
#:use-module (guix progress)
#:autoload (guix store deduplication) (copy-file/deduplicate)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-9)
#:use-module (srfi srfi-26)
@ -242,10 +243,13 @@ permissions. Write verbose output to the LOG port."
lstat)))
(define* (populate-store reference-graphs target
#:key (log-port (current-error-port)))
#:key
(deduplicate? #t)
(log-port (current-error-port)))
"Populate the store under directory TARGET with the items specified in
REFERENCE-GRAPHS, a list of reference-graph files. Items copied to TARGET
maintain timestamps and permissions."
maintain timestamps and permissions. When DEDUPLICATE? is true, deduplicate
regular files as they are copied to TARGET."
(define store
(string-append target (%store-directory)))
@ -273,6 +277,11 @@ maintain timestamps and permissions."
(string-append target thing)
#:keep-mtime? #t
#:keep-permissions? #t
#:copy-file
(if deduplicate?
(cut copy-file/deduplicate <> <>
#:store store)
copy-file)
#:log (%make-void-port "w"))
(report))
things)))))

View File

@ -203,12 +203,19 @@ added to the pack."
#+(file-append glibc-utf8-locales "/lib/locale"))
(setlocale LC_ALL "en_US.utf8"))))
(define (import-module? module)
;; Since we don't use deduplication support in 'populate-store', don't
;; import (guix store deduplication) and its dependencies, which includes
;; Guile-Gcrypt. That way we can run tests with '--bootstrap'.
(and (not-config? module)
(not (equal? '(guix store deduplication) module))))
(define build
(with-imported-modules (source-module-closure
`((guix build utils)
(guix build union)
(gnu build install))
#:select? not-config?)
#:select? import-module?)
#~(begin
(use-modules (guix build utils)
((guix build union) #:select (relative-file-name))
@ -382,138 +389,139 @@ added to the pack."
`(("/bin" -> "bin") ,@symlinks)))
(define build
(with-imported-modules (source-module-closure
'((guix build utils)
(guix build store-copy)
(guix build union)
(gnu build install))
#:select? not-config?)
#~(begin
(use-modules (guix build utils)
(guix build store-copy)
((guix build union) #:select (relative-file-name))
(gnu build install)
(srfi srfi-1)
(srfi srfi-26)
(ice-9 match))
(with-extensions (list guile-gcrypt)
(with-imported-modules (source-module-closure
'((guix build utils)
(guix build store-copy)
(guix build union)
(gnu build install))
#:select? not-config?)
#~(begin
(use-modules (guix build utils)
(guix build store-copy)
((guix build union) #:select (relative-file-name))
(gnu build install)
(srfi srfi-1)
(srfi srfi-26)
(ice-9 match))
(define database #+database)
(define entry-point #$entry-point)
(define database #+database)
(define entry-point #$entry-point)
(define (mksquashfs args)
(apply invoke "mksquashfs"
`(,@args
(define (mksquashfs args)
(apply invoke "mksquashfs"
`(,@args
;; Do not create a "recovery file" when appending to the
;; file system since it's useless in this case.
"-no-recovery"
;; Do not create a "recovery file" when appending to the
;; file system since it's useless in this case.
"-no-recovery"
;; Do not attempt to store extended attributes.
;; See <https://bugs.gnu.org/40043>.
"-no-xattrs"
;; Do not attempt to store extended attributes.
;; See <https://bugs.gnu.org/40043>.
"-no-xattrs"
;; Set file times and the file system creation time to
;; one second after the Epoch.
"-all-time" "1" "-mkfs-time" "1"
;; Set file times and the file system creation time to
;; one second after the Epoch.
"-all-time" "1" "-mkfs-time" "1"
;; Reset all UIDs and GIDs.
"-force-uid" "0" "-force-gid" "0")))
;; Reset all UIDs and GIDs.
"-force-uid" "0" "-force-gid" "0")))
(setenv "PATH" #+(file-append archiver "/bin"))
(setenv "PATH" #+(file-append archiver "/bin"))
;; We need an empty file in order to have a valid file argument when
;; we reparent the root file system. Read on for why that's
;; necessary.
(with-output-to-file ".empty" (lambda () (display "")))
;; We need an empty file in order to have a valid file argument when
;; we reparent the root file system. Read on for why that's
;; necessary.
(with-output-to-file ".empty" (lambda () (display "")))
;; Create the squashfs image in several steps.
;; Add all store items. Unfortunately mksquashfs throws away all
;; ancestor directories and only keeps the basename. We fix this
;; in the following invocations of mksquashfs.
(mksquashfs `(,@(map store-info-item
(call-with-input-file "profile"
read-reference-graph))
#$environment
,#$output
;; Create the squashfs image in several steps.
;; Add all store items. Unfortunately mksquashfs throws away all
;; ancestor directories and only keeps the basename. We fix this
;; in the following invocations of mksquashfs.
(mksquashfs `(,@(map store-info-item
(call-with-input-file "profile"
read-reference-graph))
#$environment
,#$output
;; Do not perform duplicate checking because we
;; don't have any dupes.
"-no-duplicates"
"-comp"
,#+(compressor-name compressor)))
;; Do not perform duplicate checking because we
;; don't have any dupes.
"-no-duplicates"
"-comp"
,#+(compressor-name compressor)))
;; Here we reparent the store items. For each sub-directory of
;; the store prefix we need one invocation of "mksquashfs".
(for-each (lambda (dir)
(mksquashfs `(".empty"
,#$output
"-root-becomes" ,dir)))
(reverse (string-tokenize (%store-directory)
(char-set-complement (char-set #\/)))))
;; Here we reparent the store items. For each sub-directory of
;; the store prefix we need one invocation of "mksquashfs".
(for-each (lambda (dir)
(mksquashfs `(".empty"
,#$output
"-root-becomes" ,dir)))
(reverse (string-tokenize (%store-directory)
(char-set-complement (char-set #\/)))))
;; Add symlinks and mount points.
(mksquashfs
`(".empty"
,#$output
;; Create SYMLINKS via pseudo file definitions.
,@(append-map
(match-lambda
((source '-> target)
;; Create relative symlinks to work around a bug in
;; Singularity 2.x:
;; https://bugs.gnu.org/34913
;; https://github.com/sylabs/singularity/issues/1487
(let ((target (string-append #$profile "/" target)))
(list "-p"
(string-join
;; name s mode uid gid symlink
(list source
"s" "777" "0" "0"
(relative-file-name (dirname source)
target)))))))
'#$symlinks*)
;; Add symlinks and mount points.
(mksquashfs
`(".empty"
,#$output
;; Create SYMLINKS via pseudo file definitions.
,@(append-map
(match-lambda
((source '-> target)
;; Create relative symlinks to work around a bug in
;; Singularity 2.x:
;; https://bugs.gnu.org/34913
;; https://github.com/sylabs/singularity/issues/1487
(let ((target (string-append #$profile "/" target)))
(list "-p"
(string-join
;; name s mode uid gid symlink
(list source
"s" "777" "0" "0"
(relative-file-name (dirname source)
target)))))))
'#$symlinks*)
"-p" "/.singularity.d d 555 0 0"
"-p" "/.singularity.d d 555 0 0"
;; Create the environment file.
"-p" "/.singularity.d/env d 555 0 0"
"-p" ,(string-append
"/.singularity.d/env/90-environment.sh s 777 0 0 "
(relative-file-name "/.singularity.d/env"
#$environment))
;; Create the environment file.
"-p" "/.singularity.d/env d 555 0 0"
"-p" ,(string-append
"/.singularity.d/env/90-environment.sh s 777 0 0 "
(relative-file-name "/.singularity.d/env"
#$environment))
;; Create /.singularity.d/actions, and optionally the 'run'
;; script, used by 'singularity run'.
"-p" "/.singularity.d/actions d 555 0 0"
;; Create /.singularity.d/actions, and optionally the 'run'
;; script, used by 'singularity run'.
"-p" "/.singularity.d/actions d 555 0 0"
,@(if entry-point
`(;; This one if for Singularity 2.x.
"-p"
,(string-append
"/.singularity.d/actions/run s 777 0 0 "
(relative-file-name "/.singularity.d/actions"
(string-append #$profile "/"
entry-point)))
,@(if entry-point
`( ;; This one if for Singularity 2.x.
"-p"
,(string-append
"/.singularity.d/actions/run s 777 0 0 "
(relative-file-name "/.singularity.d/actions"
(string-append #$profile "/"
entry-point)))
;; This one is for Singularity 3.x.
"-p"
,(string-append
"/.singularity.d/runscript s 777 0 0 "
(relative-file-name "/.singularity.d"
(string-append #$profile "/"
entry-point))))
'())
;; This one is for Singularity 3.x.
"-p"
,(string-append
"/.singularity.d/runscript s 777 0 0 "
(relative-file-name "/.singularity.d"
(string-append #$profile "/"
entry-point))))
'())
;; Create empty mount points.
"-p" "/proc d 555 0 0"
"-p" "/sys d 555 0 0"
"-p" "/dev d 555 0 0"
"-p" "/home d 555 0 0"))
;; Create empty mount points.
"-p" "/proc d 555 0 0"
"-p" "/sys d 555 0 0"
"-p" "/dev d 555 0 0"
"-p" "/home d 555 0 0"))
(when database
;; Initialize /var/guix.
(install-database-and-gc-roots "var-etc" database #$profile)
(mksquashfs `("var-etc" ,#$output))))))
(when database
;; Initialize /var/guix.
(install-database-and-gc-roots "var-etc" database #$profile)
(mksquashfs `("var-etc" ,#$output)))))))
(gexp->derivation (string-append name
(compressor-extension compressor)

View File

@ -34,7 +34,8 @@
#:use-module (guix serialization)
#:export (nar-sha256
deduplicate
dump-file/deduplicate))
dump-file/deduplicate
copy-file/deduplicate))
;; XXX: This port is used as a workaround on Guile <= 2.2.4 where
;; 'port-position' throws to 'out-of-range' when the offset is great than or
@ -256,3 +257,16 @@ down the road."
(get-hash)))))
(deduplicate file hash #:store store))
(define* (copy-file/deduplicate source target
#:key (store (%store-directory)))
"Like 'copy-file', but additionally deduplicate TARGET in STORE."
(call-with-input-file source
(lambda (input)
(let ((stat (stat input)))
(dump-file/deduplicate target input (stat:size stat)
(if (zero? (logand (stat:mode stat)
#o100))
'regular
'executable)
#:store store)))))

View File

@ -736,7 +736,8 @@
(zero? (logand #o222 (stat:mode st)))))))
(mkdir #$output)
(populate-store '("graph") #$output)
(populate-store '("graph") #$output
#:deduplicate? #f)
;; Check whether 'populate-store' canonicalizes
;; permissions and timestamps.

View File

@ -1,5 +1,5 @@
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2018, 2020 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
@ -25,6 +25,7 @@
#:use-module (rnrs bytevectors)
#:use-module (ice-9 binary-ports)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-26)
#:use-module (srfi srfi-64))
(test-begin "store-deduplication")
@ -106,4 +107,19 @@
(cons (apply = (map (compose stat:ino stat) identical))
(map (compose stat:nlink stat) identical))))))
(test-assert "copy-file/deduplicate"
(call-with-temporary-directory
(lambda (store)
(let ((source (search-path %load-path "gnu/packages/emacs-xyz.scm")))
(for-each (lambda (target)
(copy-file/deduplicate source
(string-append store target)
#:store store))
'("/a" "/b" "/c"))
(and (directory-exists? (string-append store "/.links"))
(file=? source (string-append store "/a"))
(apply = (map (compose stat:ino stat
(cut string-append store <>))
'("/a" "/b" "/c"))))))))
(test-end "store-deduplication")