From 43dd92024ab54921fbf85285d04344cb2c6e77cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Mon, 2 Sep 2013 23:03:03 +0200 Subject: [PATCH] union: Don't traverse sub-directories only found in one element of the union. This significantly reduces I/O when building profiles, especially with lots of package-specific sub-directories (such as 'share/emacs/24.3', 'texmf', etc.) * guix/build/union.scm (union-build)[file-tree](others-have-it?): New procedure. Use it in the 'enter?' parameter of 'file-system-fold'; change 'skip' parameter accordingly. * tests/union.scm ("union-build"): Ensure that 'include' is a symlink and 'bin' is a directory. --- guix/build/union.scm | 29 +++++++++++++++++++++++++---- tests/union.scm | 12 +++++++++++- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/guix/build/union.scm b/guix/build/union.scm index 275746d83e..077b7fe530 100644 --- a/guix/build/union.scm +++ b/guix/build/union.scm @@ -105,7 +105,22 @@ single leaf." the DIRECTORIES." (define (file-tree dir) ;; Return the contents of DIR as a tree. - (match (file-system-fold (const #t) + + (define (others-have-it? subdir) + ;; Return #t if other elements of DIRECTORIES have SUBDIR. + (let ((subdir (substring subdir (string-length dir)))) + (any (lambda (other) + (and (not (string=? other dir)) + (file-exists? (string-append other "/" subdir)))) + directories))) + + (match (file-system-fold (lambda (subdir stat result) ; enter? + ;; No need to traverse DIR since there's + ;; nothing to union it with. Thus, we avoid + ;; creating a gazillon symlinks (think + ;; share/emacs/24.3, share/texmf, etc.) + (or (string=? subdir dir) + (others-have-it? subdir))) (lambda (file stat result) ; leaf (match result (((siblings ...) rest ...) @@ -117,7 +132,12 @@ the DIRECTORIES." (((leaves ...) (siblings ...) rest ...) `(((,(basename dir) ,@leaves) ,@siblings) ,@rest)))) - (const #f) ; skip + (lambda (dir stat result) ; skip + ;; DIR is not available elsewhere, so treat it + ;; as a leaf. + (match result + (((siblings ...) rest ...) + `((,dir ,@siblings) ,@rest)))) (lambda (file stat errno result) (format (current-error-port) "union-build: ~a: ~a~%" file (strerror errno))) @@ -158,8 +178,9 @@ the DIRECTORIES." (mkdir output) (let loop ((tree (delete-duplicate-leaves (cons "." - (tree-union (append-map (compose tree-leaves file-tree) - directories))) + (tree-union + (append-map (compose tree-leaves file-tree) + (delete-duplicates directories)))) leaf=? resolve-collision)) (dir '())) diff --git a/tests/union.scm b/tests/union.scm index 9816882101..6287cffc38 100644 --- a/tests/union.scm +++ b/tests/union.scm @@ -114,7 +114,17 @@ (file-exists? "bin/ld") (file-exists? "lib/libc.so") (directory-exists? "lib/gcc") - (file-exists? "include/unistd.h")))))) + (file-exists? "include/unistd.h") + + ;; The 'include' sub-directory is only found in + ;; glibc-bootstrap, so it should be unified in a + ;; straightforward way, without traversing it. + (eq? 'symlink (stat:type (lstat "include"))) + + ;; Conversely, several inputs have a 'bin' sub-directory, so + ;; unifying it requires traversing them all, and creating a + ;; new 'bin' sub-directory in the profile. + (eq? 'directory (stat:type (lstat "bin")))))))) (test-end)