Handle gc-keep-outputs and gc-keep-derivations both enabled

If the options gc-keep-outputs and gc-keep-derivations are both
enabled, you can get a cycle in the liveness graph.  There was a hack
to handle this, but it didn't work with multiple-output derivations,
causing the garbage collector to fail with errors like ‘error: cannot
delete path `...' because it is in use by `...'’.  The garbage
collector now handles strongly connected components in the liveness
graph as a unit and decides whether to delete all or none of the paths
in an SCC.
This commit is contained in:
Eelco Dolstra 2012-09-12 18:49:35 -04:00
parent 479e9172b3
commit 4fca02077c
2 changed files with 101 additions and 96 deletions

View File

@ -448,67 +448,43 @@ bool LocalStore::tryToDelete(GCState & state, const Path & path)
startNest(nest, lvlDebug, format("considering whether to delete `%1%'") % path); startNest(nest, lvlDebug, format("considering whether to delete `%1%'") % path);
if (state.roots.find(path) != state.roots.end()) { /* If gc-keep-outputs and gc-keep-derivations are both set, we can
printMsg(lvlDebug, format("cannot delete `%1%' because it's a root") % path); have cycles in the liveness graph, so we need to treat such
goto isLive; strongly connected components as a single unit (paths). That
} is, we can delete the elements of paths only if all referrers
of paths are garbage. */
PathSet paths, referrers;
if (isValidPath(path)) { if (isValidPath(path)) {
/* Recursively try to delete the referrers of this path. If /* Add derivers and outputs of path to paths. */
any referrer can't be deleted, then this path can't be PathSet todo;
deleted either. */ todo.insert(path);
PathSet referrers; while (!todo.empty()) {
queryReferrers(path, referrers); Path p = *todo.begin();
foreach (PathSet::iterator, i, referrers) assertStorePath(p);
if (*i != path && !tryToDelete(state, *i)) { todo.erase(p);
printMsg(lvlDebug, format("cannot delete `%1%' because it has live referrers") % path); if (paths.find(p) != paths.end()) continue;
goto isLive; paths.insert(p);
/* If gc-keep-derivations is set and this is a derivation,
then don't delete the derivation if any of the outputs
are live. */
if (state.gcKeepDerivations && isDerivation(p)) {
PathSet outputs = queryDerivationOutputs(p);
foreach (PathSet::iterator, i, outputs)
if (isValidPath(*i)) todo.insert(*i);
} }
/* If gc-keep-outputs is set, then don't delete this path
/* If gc-keep-derivations is set and this is a derivation, if there are derivers of this path that are not
then don't delete the derivation if any of the outputs are garbage. */
live. */ if (state.gcKeepOutputs) {
if (state.gcKeepDerivations && isDerivation(path)) { PathSet derivers = queryValidDerivers(p);
PathSet outputs = queryDerivationOutputs(path); foreach (PathSet::iterator, i, derivers) todo.insert(*i);
foreach (PathSet::iterator, i, outputs)
if (!tryToDelete(state, *i)) {
printMsg(lvlDebug, format("cannot delete derivation `%1%' because its output `%2%' is alive") % path % *i);
goto isLive;
}
}
/* If gc-keep-derivations and gc-keep-outputs are both set,
it's possible that the path has already been deleted (due
to the recursion below), so bail out. */
if (!pathExists(path)) return true;
/* If gc-keep-outputs is set, then don't delete this path if
there are derivers of this path that are not garbage. */
if (state.gcKeepOutputs) {
PathSet derivers = queryValidDerivers(path);
foreach (PathSet::iterator, deriver, derivers) {
/* Break an infinite recursion if gc-keep-derivations
and gc-keep-outputs are both set by tentatively
assuming that this path is garbage. This is a safe
assumption because at this point, the only thing
that can prevent it from being garbage is the
deriver. Since tryToDelete() works "upwards"
through the dependency graph, it won't encouter
this path except in the call to tryToDelete() in
the gc-keep-derivation branch. */
state.deleted.insert(path);
if (!tryToDelete(state, *deriver)) {
state.deleted.erase(path);
printMsg(lvlDebug, format("cannot delete `%1%' because its deriver `%2%' is alive") % path % *deriver);
goto isLive;
}
} }
} }
} }
else { else {
/* A lock file belonging to a path that we're building right /* A lock file belonging to a path that we're building right
now isn't garbage. */ now isn't garbage. */
if (isActiveTempFile(state, path, ".lock")) return false; if (isActiveTempFile(state, path, ".lock")) return false;
@ -517,55 +493,80 @@ bool LocalStore::tryToDelete(GCState & state, const Path & path)
currently being built. */ currently being built. */
if (isActiveTempFile(state, path, ".chroot")) return false; if (isActiveTempFile(state, path, ".chroot")) return false;
paths.insert(path);
} }
/* The path is garbage, so delete it. */ /* Check if any path in paths is a root. */
if (shouldDelete(state.options.action)) { foreach (PathSet::iterator, i, paths)
if (state.roots.find(*i) != state.roots.end()) {
/* If it's a valid path that's not a regular file or symlink, printMsg(lvlDebug, format("cannot delete `%1%' because it's a root") % *i);
invalidate it, rename it, and schedule it for deletion. goto isLive;
The renaming is to ensure that later (when we're not
holding the global GC lock) we can delete the path without
being afraid that the path has become alive again.
Otherwise delete it right away. */
if (isValidPath(path)) {
if (S_ISDIR(st.st_mode)) {
printMsg(lvlInfo, format("invalidating `%1%'") % path);
// Estimate the amount freed using the narSize field.
state.bytesInvalidated += queryPathInfo(path).narSize;
invalidatePathChecked(path);
makeMutable(path.c_str());
// Mac OS X cannot rename directories if they are read-only.
if (chmod(path.c_str(), st.st_mode | S_IWUSR) == -1)
throw SysError(format("making `%1%' writable") % path);
Path tmp = (format("%1%-gc-%2%") % path % getpid()).str();
if (rename(path.c_str(), tmp.c_str()))
throw SysError(format("unable to rename `%1%' to `%2%'") % path % tmp);
state.invalidated.insert(tmp);
} else {
invalidatePathChecked(path);
deleteGarbage(state, path);
}
} else
deleteGarbage(state, path);
if (state.results.bytesFreed + state.bytesInvalidated > state.options.maxFreed) {
printMsg(lvlInfo, format("deleted or invalidated more than %1% bytes; stopping") % state.options.maxFreed);
throw GCLimitReached();
} }
} else /* Recursively try to delete the referrers of this strongly
printMsg(lvlTalkative, format("would delete `%1%'") % path); connected component. If any referrer can't be deleted, then
these paths can't be deleted either. */
foreach (PathSet::iterator, i, paths)
if (isValidPath(*i)) queryReferrers(*i, referrers);
foreach (PathSet::iterator, i, referrers)
if (paths.find(*i) == paths.end() && !tryToDelete(state, *i)) {
printMsg(lvlDebug, format("cannot delete `%1%' because it has live referrers") % *i);
goto isLive;
}
/* The paths are garbage, so delete them. */
foreach (PathSet::iterator, i, paths) {
if (shouldDelete(state.options.action)) {
/* If it's a valid path that's not a regular file or
symlink, invalidate it, rename it, and schedule it for
deletion. The renaming is to ensure that later (when
we're not holding the global GC lock) we can delete the
path without being afraid that the path has become
alive again. Otherwise delete it right away. */
if (isValidPath(*i)) {
if (S_ISDIR(st.st_mode)) {
printMsg(lvlInfo, format("invalidating `%1%'") % *i);
// Estimate the amount freed using the narSize field.
state.bytesInvalidated += queryPathInfo(*i).narSize;
invalidatePathChecked(*i);
makeMutable(i->c_str());
// Mac OS X cannot rename directories if they are read-only.
if (chmod(i->c_str(), st.st_mode | S_IWUSR) == -1)
throw SysError(format("making `%1%' writable") % *i);
Path tmp = (format("%1%-gc-%2%") % *i % getpid()).str();
if (rename(i->c_str(), tmp.c_str()))
throw SysError(format("unable to rename `%1%' to `%2%'") % *i % tmp);
state.invalidated.insert(tmp);
} else {
invalidatePathChecked(*i);
deleteGarbage(state, *i);
}
} else
deleteGarbage(state, *i);
if (state.results.bytesFreed + state.bytesInvalidated > state.options.maxFreed) {
printMsg(lvlInfo, format("deleted or invalidated more than %1% bytes; stopping") % state.options.maxFreed);
throw GCLimitReached();
}
} else
printMsg(lvlTalkative, format("would delete `%1%'") % *i);
state.deleted.insert(*i);
if (state.options.action != GCOptions::gcReturnLive)
state.results.paths.insert(*i);
}
state.deleted.insert(path);
if (state.options.action != GCOptions::gcReturnLive)
state.results.paths.insert(path);
return true; return true;
isLive: isLive:
state.live.insert(path); foreach (PathSet::iterator, i, paths) {
if (state.options.action == GCOptions::gcReturnLive) state.live.insert(*i);
state.results.paths.insert(path); if (state.options.action == GCOptions::gcReturnLive)
state.results.paths.insert(*i);
}
return false; return false;
} }
@ -733,8 +734,10 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
deleteGarbage(state, *i); deleteGarbage(state, *i);
/* Clean up the links directory. */ /* Clean up the links directory. */
printMsg(lvlError, format("deleting unused links...")); if (options.action == GCOptions::gcDeleteDead || options.action == GCOptions::gcDeleteSpecific) {
removeUnusedLinks(state); printMsg(lvlError, format("deleting unused links..."));
removeUnusedLinks(state);
}
} }

View File

@ -55,4 +55,6 @@ if nix-build multiple-outputs.nix -A cyclic --no-out-link; then
fi fi
echo "collecting garbage..." echo "collecting garbage..."
nix-store --gc rm $TEST_ROOT/result*
nix-store --gc --option gc-keep-derivations true --option gc-keep-outputs true
nix-store --gc --print-roots