pack: Prevent duplicate files in tar archives.

Tar translate duplicate files in the archive into hard links.  These can cause
problems, as not every tool support them; for example dpkg doesn't.

* gnu/system/file-systems.scm (reduce-directories): New procedure.
(file-prefix?): Lift the restriction on file prefix.  The procedure can be
useful for comparing relative file names.  Adjust doc.
(file-name-depth): New procedure, extracted from ...
(btrfs-store-subvolume-file-name): ... here.
* guix/scripts/pack.scm (self-contained-tarball/builder): Use
reduce-directories.
* tests/file-systems.scm ("reduce-directories"): New test.
This commit is contained in:
Maxim Cournoyer 2021-06-17 01:22:35 -04:00
parent 6b0e55cde9
commit 4f3bdc8f21
No known key found for this signature in database
GPG Key ID: 1260E46482E63562
3 changed files with 48 additions and 21 deletions

View File

@ -55,6 +55,7 @@
file-system-dependencies
file-system-location
reduce-directories
file-system-type-predicate
btrfs-subvolume?
btrfs-store-subvolume-file-name
@ -231,8 +232,8 @@
(char-set-complement (char-set #\/)))
(define (file-prefix? file1 file2)
"Return #t if FILE1 denotes the name of a file that is a parent of FILE2,
where both FILE1 and FILE2 are absolute file name. For example:
"Return #t if FILE1 denotes the name of a file that is a parent of FILE2.
For example:
(file-prefix? \"/gnu\" \"/gnu/store\")
=> #t
@ -240,19 +241,41 @@ where both FILE1 and FILE2 are absolute file name. For example:
(file-prefix? \"/gn\" \"/gnu/store\")
=> #f
"
(and (string-prefix? "/" file1)
(string-prefix? "/" file2)
(let loop ((file1 (string-tokenize file1 %not-slash))
(file2 (string-tokenize file2 %not-slash)))
(match file1
(()
#t)
((head1 tail1 ...)
(match file2
((head2 tail2 ...)
(and (string=? head1 head2) (loop tail1 tail2)))
(()
#f)))))))
(let loop ((file1 (string-tokenize file1 %not-slash))
(file2 (string-tokenize file2 %not-slash)))
(match file1
(()
#t)
((head1 tail1 ...)
(match file2
((head2 tail2 ...)
(and (string=? head1 head2) (loop tail1 tail2)))
(()
#f))))))
(define (file-name-depth file-name)
(length (string-tokenize file-name %not-slash)))
(define (reduce-directories file-names)
"Eliminate entries in FILE-NAMES that are children of other entries in
FILE-NAMES. This is for example useful when passing a list of files to GNU
tar, which would otherwise descend into each directory passed and archive the
duplicate files as hard links, which can be undesirable."
(let* ((file-names/sorted
;; Ascending sort by file hierarchy depth, then by file name length.
(stable-sort (delete-duplicates file-names)
(lambda (f1 f2)
(let ((depth1 (file-name-depth f1))
(depth2 (file-name-depth f2)))
(if (= depth1 depth2)
(string< f1 f2)
(< depth1 depth2)))))))
(reverse (fold (lambda (file-name results)
(if (find (cut file-prefix? <> file-name) results)
results ;parent found -- skipping
(cons file-name results)))
'()
file-names/sorted))))
(define* (file-system-device->string device #:key uuid-type)
"Return the string representations of the DEVICE field of a <file-system>
@ -624,9 +647,6 @@ store is located, else #f."
s
(string-append "/" s)))
(define (file-name-depth file-name)
(length (string-tokenize file-name %not-slash)))
(and-let* ((btrfs-subvolume-fs (filter btrfs-subvolume? file-systems))
(btrfs-subvolume-fs*
(sort btrfs-subvolume-fs

View File

@ -230,13 +230,15 @@ its source property."
`((guix build pack)
(guix build utils)
(guix build union)
(gnu build install))
(gnu build install)
(gnu system file-systems))
#:select? import-module?)
#~(begin
(use-modules (guix build pack)
(guix build utils)
((guix build union) #:select (relative-file-name))
(gnu build install)
((gnu system file-systems) #:select (reduce-directories))
(srfi srfi-1)
(srfi srfi-26)
(ice-9 match))
@ -303,7 +305,7 @@ its source property."
,(string-append "." (%store-directory))
,@(delete-duplicates
,@(reduce-directories
(filter-map (match-lambda
(('directory directory)
(string-append "." directory))

View File

@ -1,6 +1,6 @@
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2015, 2017 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2020 Maxim Cournoyer <maxim.cournoyer@gmail.com>
;;; Copyright © 2020, 2021 Maxim Cournoyer <maxim.cournoyer@gmail.com>
;;;
;;; This file is part of GNU Guix.
;;;
@ -50,6 +50,11 @@
(device "/foo")
(flags '(bind-mount read-only)))))))))
(test-equal "reduce-directories"
'("./opt/gnu/" "./opt/gnuism" "a/b/c")
(reduce-directories '("./opt/gnu/etc" "./opt/gnu/" "./opt/gnu/bin"
"./opt/gnu/lib/debug" "./opt/gnuism" "a/b/c" "a/b/c")))
(test-assert "does not pull (guix config)"
;; This module is meant both for the host side and "build side", so make
;; sure it doesn't pull in (guix config), which depends on the user's