* Fix a race condition with parallel builds where multiple

fixed-output derivations or substitutions try to build the same
  store path at the same time.  Locking generally catches this, but
  not between multiple goals in the same process.  This happened
  especially often (actually, only) in the build farm with fetchurl
  downloads of the same file being executed on multiple machines and
  then copied back to the main machine where they would clobber each
  other (NIXBF-13).

  Solution: if a goal notices that the output path is already locked,
  then go to sleep until another goal finishes (hopefully the one
  locking the path) and try again.
This commit is contained in:
Eelco Dolstra 2007-08-28 11:36:17 +00:00
parent bc0429b1cd
commit c970b28ba0
3 changed files with 96 additions and 10 deletions

View File

@ -181,6 +181,9 @@ private:
WeakGoalMap derivationGoals;
WeakGoalMap substitutionGoals;
/* Goals waiting for busy paths to be unlocked. */
WeakGoals waitingForAnyGoal;
public:
Worker();
@ -223,6 +226,10 @@ public:
substituter invocations to reduce overhead. */
void waitForInfo(GoalPtr goal, Path sub, Path storePath);
/* Wait for any goal to finish. Pretty indiscriminate way to
wait for some resource that some other goal is holding. */
void waitForAnyGoal(GoalPtr goal);
/* Loop until the specified top-level goals have finished. */
void run(const Goals & topGoals);
@ -642,7 +649,7 @@ private:
void buildDone();
/* Is the build hook willing to perform the build? */
typedef enum {rpAccept, rpDecline, rpPostpone, rpDone} HookReply;
typedef enum {rpAccept, rpDecline, rpPostpone, rpDone, rpRestart} HookReply;
HookReply tryBuildHook();
/* Synchronously wait for a build hook to finish. */
@ -651,9 +658,13 @@ private:
/* Acquires locks on the output paths and gathers information
about the build (e.g., the input closures). During this
process its possible that we find out that the build is
unnecessary, in which case we return false (this is not an
error condition!). */
bool prepareBuild();
unnecessary, in which case we return prDone. It's also
possible that some other goal is already building/substituting
the output paths, in which case we return prRestart (go back to
the haveDerivation() state). Otherwise, prProceed is
returned. */
typedef enum {prProceed, prDone, prRestart} PrepareBuildReply;
PrepareBuildReply prepareBuild();
/* Start building a derivation. */
void startBuilder();
@ -791,6 +802,20 @@ void DerivationGoal::haveDerivation()
return;
}
/* If this is a fixed-output derivation, it is possible that some
other goal is already building the output paths. (The case
where some other process is building it is handled through
normal locking mechanisms.) So if any output paths are already
being built, put this goal to sleep. */
for (PathSet::iterator i = invalidOutputs.begin();
i != invalidOutputs.end(); ++i)
if (pathIsLockedByMe(*i)) {
/* Wait until any goal finishes (hopefully the one that is
locking *i), then retry haveDerivation(). */
worker.waitForAnyGoal(shared_from_this());
return;
}
/* We are first going to try to create the invalid output paths
through substitutes. If that doesn't work, we'll build
them. */
@ -884,6 +909,12 @@ void DerivationGoal::tryToBuild()
/* Somebody else did it. */
amDone(ecSuccess);
return;
case rpRestart:
/* Somebody else is building this output path.
Restart from haveDerivation(). */
state = &DerivationGoal::haveDerivation;
worker.waitForAnyGoal(shared_from_this());
return;
}
/* Make sure that we are allowed to start a build. */
@ -894,9 +925,14 @@ void DerivationGoal::tryToBuild()
/* Acquire locks and such. If we then see that the build has
been done by somebody else, we're done. */
if (!prepareBuild()) {
PrepareBuildReply preply = prepareBuild();
if (preply == prDone) {
amDone(ecSuccess);
return;
} else if (preply == prRestart) {
state = &DerivationGoal::haveDerivation;
worker.waitForAnyGoal(shared_from_this());
return;
}
/* Okay, we have to build. */
@ -1176,11 +1212,12 @@ DerivationGoal::HookReply DerivationGoal::tryBuildHook()
/* Acquire locks and such. If we then see that the output
paths are now valid, we're done. */
if (!prepareBuild()) {
PrepareBuildReply preply = prepareBuild();
if (preply == prDone || preply == prRestart) {
/* Tell the hook to exit. */
writeLine(toHook.writeSide, "cancel");
terminateBuildHook();
return rpDone;
return preply == prDone ? rpDone : rpRestart;
}
printMsg(lvlInfo, format("running hook to build path(s) %1%")
@ -1256,8 +1293,20 @@ void DerivationGoal::terminateBuildHook(bool kill)
}
bool DerivationGoal::prepareBuild()
DerivationGoal::PrepareBuildReply DerivationGoal::prepareBuild()
{
/* Check for the possibility that some other goal in this process
has locked the output since we checked in haveDerivation().
(It can't happen between here and the lockPaths() call below
because we're not allowing multi-threading.) */
for (DerivationOutputs::iterator i = drv.outputs.begin();
i != drv.outputs.end(); ++i)
if (pathIsLockedByMe(i->second.path)) {
debug(format("restarting derivation `%1%' because `%2%' is locked by another goal")
% drvPath % i->second.path);
return prRestart;
}
/* Obtain locks on all output paths. The locks are automatically
released when we exit this function or Nix crashes. */
/* !!! BUG: this could block, which is not allowed. */
@ -1276,7 +1325,7 @@ bool DerivationGoal::prepareBuild()
debug(format("skipping build of derivation `%1%', someone beat us to it")
% drvPath);
outputLocks.setDeletion(true);
return false;
return prDone;
}
if (validPaths.size() > 0) {
@ -1341,7 +1390,7 @@ bool DerivationGoal::prepareBuild()
allPaths.insert(inputPaths.begin(), inputPaths.end());
return true;
return prProceed;
}
@ -2028,6 +2077,16 @@ void SubstitutionGoal::tryToRun()
return;
}
/* Maybe a derivation goal has already locked this path
(exceedingly unlikely, since it should have used a substitute
first, but let's be defensive). */
if (pathIsLockedByMe(storePath)) {
debug(format("restarting substitution of `%1%' because it's locked by another goal")
% storePath);
worker.waitForAnyGoal(shared_from_this());
return; /* restart in the tryToRun() state when another goal finishes */
}
/* Acquire a lock on the output path. */
outputLock = boost::shared_ptr<PathLocks>(new PathLocks);
outputLock->lockPaths(singleton<PathSet>(storePath),
@ -2251,6 +2310,16 @@ void Worker::removeGoal(GoalPtr goal)
if (goal->getExitCode() == Goal::ecFailed && !keepGoing)
topGoals.clear();
}
/* Wake up goals waiting for any goal to finish. */
for (WeakGoals::iterator i = waitingForAnyGoal.begin();
i != waitingForAnyGoal.end(); ++i)
{
GoalPtr goal = i->lock();
if (goal) wakeUp(goal);
}
waitingForAnyGoal.clear();
}
@ -2337,6 +2406,13 @@ void Worker::waitForInfo(GoalPtr goal, Path sub, Path storePath)
}
void Worker::waitForAnyGoal(GoalPtr goal)
{
debug("wait for any goal");
waitingForAnyGoal.insert(goal);
}
void Worker::getInfo()
{
for (map<Path, PathSet>::iterator i = requestedInfo.begin();

View File

@ -224,4 +224,11 @@ void PathLocks::setDeletion(bool deletePaths)
}
bool pathIsLockedByMe(const Path & path)
{
Path lockPath = path + ".lock";
return lockedPaths.find(lockPath) != lockedPaths.end();
}
}

View File

@ -40,6 +40,9 @@ public:
};
bool pathIsLockedByMe(const Path & path);
}