From c505702265833a762d681952bcc72562d64a242e Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 27 Jan 2005 15:21:29 +0000 Subject: [PATCH] * Fix and simplify the garbage collector (it's still not concurrent, though). In particular it's now much easier to register a GC root. Just place a symlink to whatever store path it is that you want to keep in /nix/var/nix/gcroots. --- scripts/nix-build.in | 2 +- scripts/nix-collect-garbage.in | 53 ++++++++++++------------------- src/libstore/build.cc | 4 +-- src/libstore/gc.cc | 58 +++++++++++++++++++++++++++++++++- src/libstore/gc.hh | 27 ++++++---------- src/libstore/store.cc | 11 +++++-- src/libstore/store.hh | 2 +- src/nix-env/main.cc | 2 +- src/nix-env/profiles.cc | 19 +++++------ src/nix-store/main.cc | 55 ++++++++------------------------ 10 files changed, 124 insertions(+), 109 deletions(-) diff --git a/scripts/nix-build.in b/scripts/nix-build.in index a5d747686a..5ae591f351 100644 --- a/scripts/nix-build.in +++ b/scripts/nix-build.in @@ -23,7 +23,7 @@ for i in "$@"; do for j in $storeExprs; do echo "store expression is $j" >&2 done - outPaths=$(@bindir@/nix-store -qnfv $extraArgs $storeExprs) + outPaths=$(@bindir@/nix-store -rv $extraArgs $storeExprs) for j in $outPaths; do echo "$j" if test -z "$noLink"; then diff --git a/scripts/nix-collect-garbage.in b/scripts/nix-collect-garbage.in index 44bcc16bbc..f11ed2cb69 100644 --- a/scripts/nix-collect-garbage.in +++ b/scripts/nix-collect-garbage.in @@ -9,7 +9,6 @@ my $storeDir = "@storedir@"; my %alive; my $gcOper = "--delete"; -my $minAge = 0; my @roots = (); @@ -20,33 +19,11 @@ for (my $i = 0; $i < scalar @ARGV; $i++) { if ($arg eq "--delete" || $arg eq "--print-live" || $arg eq "--print-dead") { $gcOper = $arg; } - elsif ($arg eq "--min-age") { - $i++; - $minAge = undef; - $minAge = $ARGV[$i]; - die "invalid minimum age" unless defined $minAge && $minAge =~ /^\d*$/; - } else { die "unknown argument `$arg'" }; } -# Read all GC roots from the given file. -sub readRoots { - my $fileName = shift; - open ROOT, "<$fileName" or die "cannot open `$fileName': $!"; - while () { - chomp; - foreach my $root (split ' ') { - die "bad root `$root' in file `$fileName'" - unless $root =~ /^\S+$/; - push @roots, $root; - } - } - close ROOT; -} - - -# Recursively finds all *.gcroot files in the given directory. +# Recursively finds all symlinks to the store in the given directory. sub findRoots; sub findRoots { my $followSymlinks = shift; @@ -58,14 +35,26 @@ sub findRoots { foreach my $name (@names) { next if $name eq "." || $name eq ".."; - $name = $dir . "/" . $name; - if ($name =~ /.gcroot$/ && -f $name) { - readRoots $name; - } - elsif (-d $name) { - if ($followSymlinks || !-l $name) { - findRoots 0, $name; + my $path = $dir . "/" . $name; + + if (-l $path) { + my $target = readlink $path + or die "cannot read symlink `$path': $!"; + + if (substr($target, 0, length $storeDir) eq $storeDir) { + # We're only interested in the store-level part. + $target = substr($target, length $storeDir); + $target = "$storeDir/$target"; + push @roots, $target; } + + elsif ($followSymlinks && -d $path) { + findRoots 0, $path; + } + } + + elsif (-d $path) { + findRoots $followSymlinks, $path; } } @@ -77,7 +66,7 @@ findRoots 1, $rootsDir; # Run the collector with the roots we found. -my $pid = open2(">&1", \*WRITE, "@bindir@/nix-store --gc $gcOper --min-age $minAge") +my $pid = open2(">&1", \*WRITE, "@bindir@/nix-store --gc $gcOper") or die "cannot run `nix-store --gc'"; foreach my $root (@roots) { diff --git a/src/libstore/build.cc b/src/libstore/build.cc index b63488b8de..52bd08bb11 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -458,7 +458,7 @@ void DerivationGoal::haveStoreExpr() i != invalidOutputs.end(); ++i) /* Don't bother creating a substitution goal if there are no substitutes. */ - if (querySubstitutes(*i).size() > 0) + if (querySubstitutes(noTxn, *i).size() > 0) addWaitee(worker.makeSubstitutionGoal(*i)); if (waitees.empty()) /* to prevent hang (no wake-up event) */ @@ -1315,7 +1315,7 @@ void SubstitutionGoal::init() } /* Read the substitutes. */ - subs = querySubstitutes(storePath); + subs = querySubstitutes(noTxn, storePath); /* To maintain the closure invairant, we first have to realise the paths referenced by this one. */ diff --git a/src/libstore/gc.cc b/src/libstore/gc.cc index 4f33065054..ba6e6bb9d4 100644 --- a/src/libstore/gc.cc +++ b/src/libstore/gc.cc @@ -1,12 +1,68 @@ #include "globals.hh" #include "gc.hh" - +#include "build.hh" #include #include #include +void collectGarbage(const PathSet & roots, GCAction action, + PathSet & result) +{ + result.clear(); + + /* !!! TODO: Acquire an exclusive lock on the gcroots directory. + This prevents the set of live paths from increasing after this + point. */ + + /* Determine the live paths which is just the closure of the + roots under the `references' relation. */ + PathSet livePaths; + for (PathSet::const_iterator i = roots.begin(); i != roots.end(); ++i) + computeFSClosure(canonPath(*i), livePaths); + + if (action == gcReturnLive) { + result = livePaths; + return; + } + + /* !!! TODO: Try to acquire (without blocking) exclusive locks on + the files in the `pending' directory. Delete all files for + which we managed to acquire such a lock (since if we could get + such a lock, that means that the process that owned the file + has died). */ + + /* !!! TODO: Acquire shared locks on all files in the pending + directories. This prevents the set of pending paths from + increasing while we are garbage-collecting. Read the set of + pending paths from those files. */ + + /* Read the Nix store directory to find all currently existing + paths. */ + Strings storeNames = readDirectory(nixStore); + + for (Strings::iterator i = storeNames.begin(); i != storeNames.end(); ++i) { + Path path = canonPath(nixStore + "/" + *i); + + if (livePaths.find(path) != livePaths.end()) { + debug(format("live path `%1%'") % path); + continue; + } + + debug(format("dead path `%1%'") % path); + result.insert(path); + + if (action == gcDeleteDead) { + printMsg(lvlInfo, format("deleting `%1%'") % path); + deleteFromStore(path); + } + + } +} + + + #if 0 void followLivePaths(Path nePath, PathSet & live) { diff --git a/src/libstore/gc.hh b/src/libstore/gc.hh index d1ca5c63e4..2ea851abc0 100644 --- a/src/libstore/gc.hh +++ b/src/libstore/gc.hh @@ -3,24 +3,15 @@ #include "util.hh" +/* Garbage collector operation. */ +typedef enum { gcReturnLive, gcReturnDead, gcDeleteDead } GCAction; -/* Determine the set of "live" store paths, given a set of root store - expressions. The live store paths are those that are reachable - from the roots. The roots are reachable by definition. Any path - mentioned in a reachable store expression is also reachable. If a - derivation store expression is reachable, then its successor (if it - exists) if also reachable. It is not an error for store - expressions not to exist (since this can happen on derivation store - expressions, for instance, due to the substitute mechanism), but - successor links are followed even for non-existant derivations. */ -PathSet findLivePaths(const Paths & roots); - -/* Given a set of "live" store paths, determine the set of "dead" - store paths (which are simply all store paths that are not in the - live set). The value `minAge' specifies the minimum age in seconds - for an unreachable file to be considered dead (0 meaning that any - unreachable file is dead). */ -PathSet findDeadPaths(const PathSet & live, time_t minAge); - +/* If `action' is set to `soReturnLive', return the set of paths + reachable from (i.e. in the closure of) the specified roots. If + `action' is `soReturnDead', return the set of paths not reachable + from the roots. If `action' is `soDeleteDead', actually delete the + latter set. */ +void collectGarbage(const PathSet & roots, GCAction action, + PathSet & result); #endif /* !__GC_H */ diff --git a/src/libstore/store.cc b/src/libstore/store.cc index f5e7d2aa58..30573992cc 100644 --- a/src/libstore/store.cc +++ b/src/libstore/store.cc @@ -363,9 +363,9 @@ void registerSubstitute(const Transaction & txn, } -Substitutes querySubstitutes(const Path & srcPath) +Substitutes querySubstitutes(const Transaction & txn, const Path & srcPath) { - return readSubstitutes(noTxn, srcPath); + return readSubstitutes(txn, srcPath); } @@ -411,6 +411,13 @@ static void invalidatePath(const Path & path, Transaction & txn) debug(format("unregistering path `%1%'") % path); nixDB.delPair(txn, dbValidPaths, path); + + /* Clear the `references' entry for this path, as well as the + inverse `referers' entries; but only if there are no + substitutes for this path. This maintains the cleanup + invariant. */ + if (querySubstitutes(txn, path).size() == 0) + setReferences(txn, path, PathSet()); } diff --git a/src/libstore/store.hh b/src/libstore/store.hh index 968786305e..dce4eb1d62 100644 --- a/src/libstore/store.hh +++ b/src/libstore/store.hh @@ -45,7 +45,7 @@ void registerSubstitute(const Transaction & txn, const Path & srcPath, const Substitute & sub); /* Return the substitutes for the given path. */ -Substitutes querySubstitutes(const Path & srcPath); +Substitutes querySubstitutes(const Transaction & txn, const Path & srcPath); /* Deregister all substitutes. */ void clearSubstitutes(); diff --git a/src/nix-env/main.cc b/src/nix-env/main.cc index 3cb6b02c1f..da77e24288 100644 --- a/src/nix-env/main.cc +++ b/src/nix-env/main.cc @@ -573,7 +573,7 @@ static void opQuery(Globals & globals, Strings columns; if (printStatus) { - Substitutes subs = querySubstitutes(i->drvPath); + Substitutes subs = querySubstitutes(noTxn, i->drvPath); columns.push_back( (string) (installedPaths.find(i->outPath) != installedPaths.end() ? "I" : "-") diff --git a/src/nix-env/profiles.cc b/src/nix-env/profiles.cc index 96467831f1..abfdf9fede 100644 --- a/src/nix-env/profiles.cc +++ b/src/nix-env/profiles.cc @@ -62,11 +62,11 @@ Generations findGenerations(Path profile, int & curGen) static void makeNames(const Path & profile, unsigned int num, - Path & generation, Path & gcrootDrv) + Path & outLink, Path & drvLink) { Path prefix = (format("%1%-%2%") % profile % num).str(); - generation = prefix + "-link"; - gcrootDrv = prefix + "-drv.gcroot"; + outLink = prefix + "-output"; + drvLink = prefix + "-drv"; } @@ -79,20 +79,21 @@ Path createGeneration(Path profile, Path outPath, Path drvPath) unsigned int num = gens.size() > 0 ? gens.front().number : 0; /* Create the new generation. */ - Path generation, gcrootDrv; + Path outLink, drvLink; while (1) { - makeNames(profile, num, generation, gcrootDrv); - if (symlink(outPath.c_str(), generation.c_str()) == 0) break; + makeNames(profile, num, outLink, drvLink); + if (symlink(outPath.c_str(), outLink.c_str()) == 0) break; if (errno != EEXIST) - throw SysError(format("creating symlink `%1%'") % generation); + throw SysError(format("creating symlink `%1%'") % outLink); /* Somebody beat us to it, retry with a higher number. */ num++; } - writeStringToFile(gcrootDrv, drvPath); + if (symlink(drvPath.c_str(), drvLink.c_str()) != 0) + throw SysError(format("creating symlink `%1%'") % drvLink); - return generation; + return outLink; } diff --git a/src/nix-store/main.cc b/src/nix-store/main.cc index ea8d398f1a..810fe94b65 100644 --- a/src/nix-store/main.cc +++ b/src/nix-store/main.cc @@ -274,61 +274,32 @@ static void opIsValid(Strings opFlags, Strings opArgs) static void opGC(Strings opFlags, Strings opArgs) { -#if 0 + GCAction action; + /* Do what? */ - enum { soPrintLive, soPrintDead, soDelete } subOp; - time_t minAge = 0; for (Strings::iterator i = opFlags.begin(); i != opFlags.end(); ++i) - if (*i == "--print-live") subOp = soPrintLive; - else if (*i == "--print-dead") subOp = soPrintDead; - else if (*i == "--delete") subOp = soDelete; - else if (*i == "--min-age") { - int n; - if (opArgs.size() == 0 || !string2Int(opArgs.front(), n)) - throw UsageError("`--min-age' requires an integer argument"); - minAge = n; - } + if (*i == "--print-live") action = gcReturnLive; + else if (*i == "--print-dead") action = gcReturnDead; + else if (*i == "--delete") action = gcDeleteDead; else throw UsageError(format("bad sub-operation `%1%' in GC") % *i); - - Paths roots; + + /* Read the roots. */ + PathSet roots; while (1) { Path root; getline(cin, root); if (cin.eof()) break; - roots.push_back(root); + roots.insert(root); } - PathSet live = findLivePaths(roots); + PathSet result; + collectGarbage(roots, action, result); - if (subOp == soPrintLive) { - for (PathSet::iterator i = live.begin(); i != live.end(); ++i) + if (action != gcDeleteDead) { + for (PathSet::iterator i = result.begin(); i != result.end(); ++i) cout << *i << endl; - return; } - - PathSet dead = findDeadPaths(live, minAge * 3600); - - if (subOp == soPrintDead) { - for (PathSet::iterator i = dead.begin(); i != dead.end(); ++i) - cout << *i << endl; - return; - } - - if (subOp == soDelete) { - - /* !!! What happens if the garbage collector run is aborted - halfway through? In particular, dead paths can always - become live again (through re-instantiation), and might - then refer to deleted paths. => check instantiation - invariants */ - - for (PathSet::iterator i = dead.begin(); i != dead.end(); ++i) { - printMsg(lvlInfo, format("deleting `%1%'") % *i); - deleteFromStore(*i); - } - } -#endif }