From c970b28ba0f8866bde800849120d429d781ccb5d Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 28 Aug 2007 11:36:17 +0000 Subject: [PATCH] * Fix a race condition with parallel builds where multiple fixed-output derivations or substitutions try to build the same store path at the same time. Locking generally catches this, but not between multiple goals in the same process. This happened especially often (actually, only) in the build farm with fetchurl downloads of the same file being executed on multiple machines and then copied back to the main machine where they would clobber each other (NIXBF-13). Solution: if a goal notices that the output path is already locked, then go to sleep until another goal finishes (hopefully the one locking the path) and try again. --- src/libstore/build.cc | 96 +++++++++++++++++++++++++++++++++++---- src/libstore/pathlocks.cc | 7 +++ src/libstore/pathlocks.hh | 3 ++ 3 files changed, 96 insertions(+), 10 deletions(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 4a2affdf59..5ad8105006 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -181,6 +181,9 @@ private: WeakGoalMap derivationGoals; WeakGoalMap substitutionGoals; + /* Goals waiting for busy paths to be unlocked. */ + WeakGoals waitingForAnyGoal; + public: Worker(); @@ -222,6 +225,10 @@ public: get info about `storePath' (with --query-info). We combine substituter invocations to reduce overhead. */ void waitForInfo(GoalPtr goal, Path sub, Path storePath); + + /* Wait for any goal to finish. Pretty indiscriminate way to + wait for some resource that some other goal is holding. */ + void waitForAnyGoal(GoalPtr goal); /* Loop until the specified top-level goals have finished. */ void run(const Goals & topGoals); @@ -642,7 +649,7 @@ private: void buildDone(); /* Is the build hook willing to perform the build? */ - typedef enum {rpAccept, rpDecline, rpPostpone, rpDone} HookReply; + typedef enum {rpAccept, rpDecline, rpPostpone, rpDone, rpRestart} HookReply; HookReply tryBuildHook(); /* Synchronously wait for a build hook to finish. */ @@ -651,9 +658,13 @@ private: /* Acquires locks on the output paths and gathers information about the build (e.g., the input closures). During this process its possible that we find out that the build is - unnecessary, in which case we return false (this is not an - error condition!). */ - bool prepareBuild(); + unnecessary, in which case we return prDone. It's also + possible that some other goal is already building/substituting + the output paths, in which case we return prRestart (go back to + the haveDerivation() state). Otherwise, prProceed is + returned. */ + typedef enum {prProceed, prDone, prRestart} PrepareBuildReply; + PrepareBuildReply prepareBuild(); /* Start building a derivation. */ void startBuilder(); @@ -791,6 +802,20 @@ void DerivationGoal::haveDerivation() return; } + /* If this is a fixed-output derivation, it is possible that some + other goal is already building the output paths. (The case + where some other process is building it is handled through + normal locking mechanisms.) So if any output paths are already + being built, put this goal to sleep. */ + for (PathSet::iterator i = invalidOutputs.begin(); + i != invalidOutputs.end(); ++i) + if (pathIsLockedByMe(*i)) { + /* Wait until any goal finishes (hopefully the one that is + locking *i), then retry haveDerivation(). */ + worker.waitForAnyGoal(shared_from_this()); + return; + } + /* We are first going to try to create the invalid output paths through substitutes. If that doesn't work, we'll build them. */ @@ -884,6 +909,12 @@ void DerivationGoal::tryToBuild() /* Somebody else did it. */ amDone(ecSuccess); return; + case rpRestart: + /* Somebody else is building this output path. + Restart from haveDerivation(). */ + state = &DerivationGoal::haveDerivation; + worker.waitForAnyGoal(shared_from_this()); + return; } /* Make sure that we are allowed to start a build. */ @@ -894,9 +925,14 @@ void DerivationGoal::tryToBuild() /* Acquire locks and such. If we then see that the build has been done by somebody else, we're done. */ - if (!prepareBuild()) { + PrepareBuildReply preply = prepareBuild(); + if (preply == prDone) { amDone(ecSuccess); return; + } else if (preply == prRestart) { + state = &DerivationGoal::haveDerivation; + worker.waitForAnyGoal(shared_from_this()); + return; } /* Okay, we have to build. */ @@ -1176,11 +1212,12 @@ DerivationGoal::HookReply DerivationGoal::tryBuildHook() /* Acquire locks and such. If we then see that the output paths are now valid, we're done. */ - if (!prepareBuild()) { + PrepareBuildReply preply = prepareBuild(); + if (preply == prDone || preply == prRestart) { /* Tell the hook to exit. */ writeLine(toHook.writeSide, "cancel"); terminateBuildHook(); - return rpDone; + return preply == prDone ? rpDone : rpRestart; } printMsg(lvlInfo, format("running hook to build path(s) %1%") @@ -1256,8 +1293,20 @@ void DerivationGoal::terminateBuildHook(bool kill) } -bool DerivationGoal::prepareBuild() +DerivationGoal::PrepareBuildReply DerivationGoal::prepareBuild() { + /* Check for the possibility that some other goal in this process + has locked the output since we checked in haveDerivation(). + (It can't happen between here and the lockPaths() call below + because we're not allowing multi-threading.) */ + for (DerivationOutputs::iterator i = drv.outputs.begin(); + i != drv.outputs.end(); ++i) + if (pathIsLockedByMe(i->second.path)) { + debug(format("restarting derivation `%1%' because `%2%' is locked by another goal") + % drvPath % i->second.path); + return prRestart; + } + /* Obtain locks on all output paths. The locks are automatically released when we exit this function or Nix crashes. */ /* !!! BUG: this could block, which is not allowed. */ @@ -1276,7 +1325,7 @@ bool DerivationGoal::prepareBuild() debug(format("skipping build of derivation `%1%', someone beat us to it") % drvPath); outputLocks.setDeletion(true); - return false; + return prDone; } if (validPaths.size() > 0) { @@ -1341,7 +1390,7 @@ bool DerivationGoal::prepareBuild() allPaths.insert(inputPaths.begin(), inputPaths.end()); - return true; + return prProceed; } @@ -2028,6 +2077,16 @@ void SubstitutionGoal::tryToRun() return; } + /* Maybe a derivation goal has already locked this path + (exceedingly unlikely, since it should have used a substitute + first, but let's be defensive). */ + if (pathIsLockedByMe(storePath)) { + debug(format("restarting substitution of `%1%' because it's locked by another goal") + % storePath); + worker.waitForAnyGoal(shared_from_this()); + return; /* restart in the tryToRun() state when another goal finishes */ + } + /* Acquire a lock on the output path. */ outputLock = boost::shared_ptr(new PathLocks); outputLock->lockPaths(singleton(storePath), @@ -2251,6 +2310,16 @@ void Worker::removeGoal(GoalPtr goal) if (goal->getExitCode() == Goal::ecFailed && !keepGoing) topGoals.clear(); } + + /* Wake up goals waiting for any goal to finish. */ + for (WeakGoals::iterator i = waitingForAnyGoal.begin(); + i != waitingForAnyGoal.end(); ++i) + { + GoalPtr goal = i->lock(); + if (goal) wakeUp(goal); + } + + waitingForAnyGoal.clear(); } @@ -2337,6 +2406,13 @@ void Worker::waitForInfo(GoalPtr goal, Path sub, Path storePath) } +void Worker::waitForAnyGoal(GoalPtr goal) +{ + debug("wait for any goal"); + waitingForAnyGoal.insert(goal); +} + + void Worker::getInfo() { for (map::iterator i = requestedInfo.begin(); diff --git a/src/libstore/pathlocks.cc b/src/libstore/pathlocks.cc index 821d4d02fe..9d582206db 100644 --- a/src/libstore/pathlocks.cc +++ b/src/libstore/pathlocks.cc @@ -223,5 +223,12 @@ void PathLocks::setDeletion(bool deletePaths) this->deletePaths = deletePaths; } + +bool pathIsLockedByMe(const Path & path) +{ + Path lockPath = path + ".lock"; + return lockedPaths.find(lockPath) != lockedPaths.end(); +} + } diff --git a/src/libstore/pathlocks.hh b/src/libstore/pathlocks.hh index 87bb7bf3ef..0ca1ce6878 100644 --- a/src/libstore/pathlocks.hh +++ b/src/libstore/pathlocks.hh @@ -40,6 +40,9 @@ public: }; +bool pathIsLockedByMe(const Path & path); + + }