guix/gnu/system/linux-container.scm

368 lines
17 KiB
Scheme

;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2015 David Thompson <davet@gnu.org>
;;; Copyright © 2016-2017, 2019-2023 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2019 Arun Isaac <arunisaac@systemreboot.net>
;;; Copyright © 2020 Efraim Flashner <efraim@flashner.co.il>
;;; Copyright © 2020 Google LLC
;;; Copyright © 2022 Ricardo Wurmus <rekado@elephly.net>
;;; Copyright © 2023 Pierre Langlois <pierre.langlois@gmx.com>
;;; Copyright © 2024 Leo Nikkilä <hello@lnikki.la>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
(define-module (gnu system linux-container)
#:use-module (ice-9 match)
#:use-module (srfi srfi-1)
#:use-module (guix config)
#:use-module (guix store)
#:use-module (guix gexp)
#:use-module (guix derivations)
#:use-module (guix monads)
#:use-module (guix modules)
#:use-module (gnu build linux-container)
#:use-module (gnu services)
#:use-module (gnu services base)
#:use-module (gnu services networking)
#:use-module (gnu services shepherd)
#:use-module (gnu system)
#:use-module (gnu system file-systems)
#:export (system-container
containerized-operating-system
container-script
eval/container))
(define* (container-essential-services os #:key shared-network?)
"Return a list of essential services corresponding to OS, a
non-containerized OS. This procedure essentially strips essential services
from OS that are needed on the bare metal and not in a container."
(define base
(remove (lambda (service)
(memq (service-kind service)
(cons* (service-kind %linux-bare-metal-service)
firmware-service-type
system-service-type
(if shared-network?
(list hosts-service-type)
'()))))
(operating-system-essential-services os)))
(cons (service system-service-type
`(("locale" ,(operating-system-locale-directory os))))
;; If network is to be shared with the host, remove network
;; configuration files from etc-service.
(if shared-network?
(modify-services base
(etc-service-type
files => (remove
(match-lambda
((filename _)
(member filename
(map basename %network-configuration-files))))
files)))
base)))
(define dummy-networking-service-type
(shepherd-service-type
'dummy-networking
(const (shepherd-service
(documentation "Provide loopback and networking without actually
doing anything.")
(provision '(loopback networking))
(start #~(const #t))))
#f
(description "Provide loopback and networking without actually doing
anything. This service is used by guest systems running in containers, where
networking support is provided by the host.")))
(define %nscd-container-caches
;; Similar to %nscd-default-caches but with smaller cache sizes. This allows
;; many containers to coexist on the same machine without exhausting RAM.
(map (lambda (cache)
(nscd-cache
(inherit cache)
(max-database-size (expt 2 18)))) ;256KiB
%nscd-default-caches))
(define* (containerized-operating-system os mappings
#:key
shared-network?
(extra-file-systems '()))
"Return an operating system based on OS for use in a Linux container
environment. MAPPINGS is a list of <file-system-mapping> to realize in the
containerized OS. EXTRA-FILE-SYSTEMS is a list of file systems to add to OS."
(define user-file-systems
(remove (lambda (fs)
(let ((target (file-system-mount-point fs))
(source (file-system-device fs)))
(or (string=? target (%store-prefix))
(string=? target "/")
(and (string? source)
(string-prefix? "/dev/" source))
(string-prefix? "/dev/" target)
(string-prefix? "/sys/" target))))
(operating-system-file-systems os)))
(define (mapping->fs fs)
(file-system (inherit (file-system-mapping->bind-mount fs))
(needed-for-boot? #t)))
(define services-to-drop
;; Service types to filter from the original operating-system. Some of
;; these make no sense in a container (e.g., those that access
;; /dev/tty[0-9]), while others just need to be reinstantiated with
;; different configs that are better suited to containers.
(append (list console-font-service-type
mingetty-service-type
agetty-service-type)
(if shared-network?
;; Replace these with dummy-networking-service-type below.
(list
static-networking-service-type
dhcp-client-service-type
network-manager-service-type
connman-service-type)
(list))))
(define services-to-add
;; Many Guix services depend on a 'networking' shepherd
;; service, so make sure to provide a dummy 'networking'
;; service when we are sure that networking is already set up
;; in the host and can be used. That prevents double setup.
(if shared-network?
(list (service dummy-networking-service-type))
'()))
(define os-with-base-essential-services
(operating-system
(inherit os)
(swap-devices '()) ; disable swap
(services
(append services-to-add
(filter-map (lambda (s)
(cond ((memq (service-kind s) services-to-drop)
#f)
((eq? nscd-service-type (service-kind s))
(service nscd-service-type
(nscd-configuration
(inherit (service-value s))
(caches %nscd-container-caches))))
((eq? guix-service-type (service-kind s))
;; Pass '--disable-chroot' so that
;; guix-daemon can build thing even in
;; Docker without '--privileged'.
(service guix-service-type
(guix-configuration
(inherit (service-value s))
(extra-options
(cons "--disable-chroot"
(guix-configuration-extra-options
(service-value s)))))))
(else s)))
(operating-system-user-services os))))
(file-systems (append (map mapping->fs
(if shared-network?
(append %network-file-mappings mappings)
mappings))
extra-file-systems
user-file-systems
;; Provide a dummy root file system so we can create
;; a 'boot-parameters' file.
(list (file-system
(mount-point "/")
(device "nothing")
(type "dummy")))))))
;; `essential-services' is thunked, we need to evaluate it separately.
(operating-system
(inherit os-with-base-essential-services)
(essential-services (container-essential-services
os-with-base-essential-services
#:shared-network? shared-network?))))
(define* (container-script os #:key (mappings '()) shared-network?)
"Return a derivation of a script that runs OS as a Linux container.
MAPPINGS is a list of <file-system> objects that specify the files/directories
that will be shared with the host system."
(define (mountable-file-system? file-system)
;; Return #t if FILE-SYSTEM should be mounted in the container.
(and (not (string=? "/" (file-system-mount-point file-system)))
(file-system-needed-for-boot? file-system)))
(define (os-file-system-specs os)
(map file-system->spec
(filter mountable-file-system?
(operating-system-file-systems os))))
(let* ((os (containerized-operating-system
os (cons %store-mapping mappings)
#:shared-network? shared-network?
#:extra-file-systems %container-file-systems))
(specs (os-file-system-specs os)))
(define script
(with-imported-modules (source-module-closure
'((guix build utils)
(gnu build linux-container)
(guix i18n)
(guix diagnostics)))
#~(begin
(use-modules (gnu build linux-container)
(gnu system file-systems) ;spec->file-system
(guix build utils)
(guix i18n)
(guix diagnostics)
(srfi srfi-1)
(srfi srfi-37)
(ice-9 match))
(define (show-help)
(display (G_ "Usage: run-container [OPTION ...]
Run the container with the given options."))
(newline)
(display (G_ "
--share=SPEC share host file system with read/write access
according to SPEC"))
(display (G_ "
--expose=SPEC expose host file system directory as read-only
according to SPEC"))
(newline)
(display (G_ "
-h, --help display this help and exit"))
(newline))
(define %options
;; Specifications of the command-line options.
(list (option '(#\h "help") #f #f
(lambda args
(show-help)
(exit 0)))
(option '("share") #t #f
(lambda (opt name arg result)
(alist-cons 'file-system-mapping
(specification->file-system-mapping arg #t)
result)))
(option '("expose") #t #f
(lambda (opt name arg result)
(alist-cons 'file-system-mapping
(specification->file-system-mapping arg #f)
result)))))
(define (parse-options args options)
(args-fold args options
(lambda (opt name arg . rest)
(report-error (G_ "~A: unrecognized option~%") name)
(exit 1))
(lambda (op res) (cons op res))
'()))
(define (explain pid)
;; XXX: We can't quite call 'bindtextdomain' so there's actually
;; no i18n.
;; XXX: Should we really give both options? 'guix container exec'
;; is a more verbose command. Hard to fail to enter the container
;; when we list two options.
(info (G_ "system container is running as PID ~a~%") pid)
(info (G_ "Run 'sudo guix container exec ~a /run/current-system/profile/bin/bash --login'\n")
pid)
(info (G_ "or run 'sudo nsenter -a -t ~a' to get a shell into it.~%") pid)
(newline (guix-warning-port)))
(let* ((opts (parse-options (cdr (command-line)) %options))
(mappings (filter-map (match-lambda
(('file-system-mapping . mapping) mapping)
(_ #f))
opts))
(file-systems
(filter-map (lambda (fs)
(let ((flags (file-system-flags fs)))
(and (or (not (memq 'bind-mount flags))
(file-exists? (file-system-device fs)))
fs)))
(append (map file-system-mapping->bind-mount mappings)
(map spec->file-system '#$specs)))))
(call-with-container file-systems
(lambda ()
(setenv "HOME" "/root")
(setenv "TMPDIR" "/tmp")
(setenv "GUIX_NEW_SYSTEM" #$os)
(for-each mkdir-p '("/run" "/bin" "/etc" "/home" "/var"))
(primitive-load (string-append #$os "/boot")))
;; A range of 65536 uid/gids is used to cover 16 bits worth of
;; users and groups, which is sufficient for most cases.
;;
;; See: http://www.freedesktop.org/software/systemd/man/systemd-nspawn.html#--private-users=
#:host-uids 65536
#:namespaces (if #$shared-network?
(delq 'net %namespaces)
%namespaces)
#:process-spawned-hook explain)))))
(gexp->script "run-container" script)))
(define* (eval/container exp
#:key
(mappings '())
(namespaces %namespaces)
(guest-uid 0) (guest-gid 0))
"Evaluate EXP, a gexp, in a new process executing in separate namespaces as
listed in NAMESPACES. Add MAPPINGS, a list of <file-system-mapping>, to the
set of directories visible in the process's mount namespace. Inside the
namespaces, run code as GUEST-UID and GUEST-GID. Return the process' exit
status as a monadic value.
This is useful to implement processes that, unlike derivations, are not
entirely pure and need to access the outside world or to perform side
effects."
(mlet %store-monad ((lowered (lower-gexp exp)))
(define inputs
(cons (lowered-gexp-guile lowered)
(lowered-gexp-inputs lowered)))
(define items
(append (append-map derivation-input-output-paths inputs)
(lowered-gexp-sources lowered)))
(mbegin %store-monad
(built-derivations inputs)
(mlet %store-monad ((closure ((store-lift requisites) items)))
(return (call-with-container (map file-system-mapping->bind-mount
(append (map (lambda (item)
(file-system-mapping
(source item)
(target source)))
closure)
mappings))
(lambda ()
(apply execl
(string-append (derivation-input-output-path
(lowered-gexp-guile lowered))
"/bin/guile")
"guile"
(append (append-map (lambda (directory)
`("-L" ,directory))
(lowered-gexp-load-path lowered))
(append-map (lambda (directory)
`("-C" ,directory))
(lowered-gexp-load-compiled-path
lowered))
(list "-c"
(object->string
(lowered-gexp-sexp lowered))))))
#:namespaces namespaces
#:guest-uid guest-uid
#:guest-gid guest-gid))))))