diff --git a/scripts/download-from-binary-cache.pl.in b/scripts/download-from-binary-cache.pl.in index ea37c818d3..a67818e7f5 100644 --- a/scripts/download-from-binary-cache.pl.in +++ b/scripts/download-from-binary-cache.pl.in @@ -12,33 +12,40 @@ use strict; my @binaryCacheUrls = map { s/\/+$//; $_ } split(/ /, ($ENV{"NIX_BINARY_CACHES"} || "")); +my $maxParallelRequests = 150; + my ($dbh, $insertNAR, $queryNAR, $insertNegativeNAR, $queryNegativeNAR); my %cacheIds; my $curlm = WWW::Curl::Multi->new; my $activeRequests = 0; my $curlIdCount = 1; -my %curlHandles; +my %requests; +my %scheduled; my $caBundle = $ENV{"CURL_CA_BUNDLE"} || $ENV{"OPENSSL_X509_CERT_FILE"}; sub addRequest { - my ($url) = @_; + my ($storePath, $url) = @_; my $curl = WWW::Curl::Easy->new; my $curlId = $curlIdCount++; - $curlHandles{$curlId} = { handle => $curl, content => "" }; + $requests{$curlId} = { storePath => $storePath, url => $url, handle => $curl, content => "" }; $curl->setopt(CURLOPT_PRIVATE, $curlId); $curl->setopt(CURLOPT_URL, $url); - $curl->setopt(CURLOPT_WRITEDATA, \$curlHandles{$curlId}->{content}); + $curl->setopt(CURLOPT_WRITEDATA, \$requests{$curlId}->{content}); $curl->setopt(CURLOPT_FOLLOWLOCATION, 1); $curl->setopt(CURLOPT_CAINFO, $caBundle) if defined $caBundle; - $curlm->add_handle($curl); - $activeRequests++; + if ($activeRequests >= $maxParallelRequests) { + $scheduled{$curlId} = 1; + } else { + $curlm->add_handle($curl); + $activeRequests++; + } - return $curlHandles{$curlId}; + return $requests{$curlId}; } @@ -55,12 +62,20 @@ sub processRequests { if ($curlm->perform() != $activeRequests) { while (my ($id, $result) = $curlm->info_read) { if ($id) { - my $handle = $curlHandles{$id}->{handle}; - $curlHandles{$id}->{result} = $result; - $curlHandles{$id}->{httpStatus} = $handle->getinfo(CURLINFO_HTTP_CODE); - #print STDERR "\nRequest completed ($id, $result, $curlHandles{$id}->{httpStatus})\n"; + my $handle = $requests{$id}->{handle}; + $requests{$id}->{result} = $result; + $requests{$id}->{httpStatus} = $handle->getinfo(CURLINFO_HTTP_CODE); + #print STDERR "\nRequest completed ($id, $result, $requests{$id}->{httpStatus})\n"; $activeRequests--; - delete $curlHandles{$id}->{handle}; + delete $requests{$id}->{handle}; + + if (scalar(keys %scheduled) > 0) { + my $id2 = (keys %scheduled)[0]; + $curlm->add_handle($requests{$id2}->{handle}); + $activeRequests++; + delete $scheduled{$id2}; + } + } } } @@ -130,23 +145,21 @@ EOF } -sub getInfoFrom { - my ($storePath, $pathHash, $binaryCacheUrl) = @_; +sub negativeHit { + my ($storePath, $binaryCacheUrl) = @_; + $queryNegativeNAR->execute(getCacheId($binaryCacheUrl), basename($storePath)); + return @{$queryNegativeNAR->fetchall_arrayref()} != 0; +} - my $cacheId = getCacheId($binaryCacheUrl); - # Bail out if there is a negative cache entry. - $queryNegativeNAR->execute($cacheId, basename($storePath)); - return undef if @{$queryNegativeNAR->fetchall_arrayref()} != 0; +sub processNARInfo { + my ($storePath, $binaryCacheUrl, $request) = @_; - my $infoUrl = "$binaryCacheUrl/$pathHash.narinfo"; - print STDERR "checking $infoUrl...\n"; - my $request = addRequest($infoUrl); - processRequests; + my $cacheId = getCacheId($binaryCacheUrl); if ($request->{result} != 0 || $request->{httpStatus} != 200) { if ($request->{httpStatus} != 404) { - print STDERR "could not download ‘$infoUrl’ (" . + print STDERR "could not download ‘$request->{url}’ (" . ($request->{result} != 0 ? "Curl error $request->{result}" : "HTTP status $request->{httpStatus}") . ")\n"; } else { $insertNegativeNAR->execute($cacheId, basename($storePath), time()); @@ -172,7 +185,7 @@ sub getInfoFrom { } return undef if $storePath ne $storePath2; if ($storePath ne $storePath2 || !defined $url || !defined $narHash) { - print STDERR "bad NAR info file ‘$infoUrl’\n"; + print STDERR "bad NAR info file ‘$request->{url}’\n"; return undef; } @@ -236,24 +249,65 @@ sub cachedGetInfoFrom { } -sub getInfo { - my ($storePath) = @_; +sub printInfo { + my ($storePath, $info) = @_; + print "$storePath\n"; + print $info->{deriver} ? "$Nix::Config::storeDir/$info->{deriver}" : "", "\n"; + print scalar @{$info->{refs}}, "\n"; + print "$Nix::Config::storeDir/$_\n" foreach @{$info->{refs}}; + print $info->{fileSize} || 0, "\n"; + print $info->{narSize} || 0, "\n"; +} - my $pathHash = substr(basename($storePath), 0, 32); - # First look if we have cached info for one of the URLs. - foreach my $binaryCacheUrl (@binaryCacheUrls) { - my $info = cachedGetInfoFrom($storePath, $pathHash, $binaryCacheUrl); - return $info if defined $info; +sub printInfoParallel { + my @paths = @_; + + # First print all paths for which we have cached info. + my @left; + foreach my $storePath (@paths) { + my $pathHash = substr(basename($storePath), 0, 32); + my $found = 0; + foreach my $binaryCacheUrl (@binaryCacheUrls) { + my $info = cachedGetInfoFrom($storePath, $pathHash, $binaryCacheUrl); + if (defined $info) { + printInfo($storePath, $info); + $found = 1; + last; + } + } + push @left, $storePath if !$found; } - # No, so do an HTTP request until we get a hit. - foreach my $binaryCacheUrl (@binaryCacheUrls) { - my $info = getInfoFrom($storePath, $pathHash, $binaryCacheUrl); - return $info if defined $info; - } + return if scalar @left == 0; - return undef; + foreach my $binaryCacheUrl (@binaryCacheUrls) { + + my @left2; + %requests = (); + foreach my $storePath (@left) { + my $pathHash = substr(basename($storePath), 0, 32); + if (negativeHit($storePath, $binaryCacheUrl)) { + push @left2, $storePath; + next; + } + my $infoUrl = "$binaryCacheUrl/$pathHash.narinfo"; + addRequest($storePath, $infoUrl); + } + + processRequests; + + foreach my $request (values %requests) { + my $info = processNARInfo($request->{storePath}, $binaryCacheUrl, $request); + if (defined $info) { + printInfo($request->{storePath}, $info); + } else { + push @left2, $request->{storePath}; + } + } + + @left = @left2; + } } @@ -264,30 +318,37 @@ sub downloadBinary { cache: foreach my $binaryCacheUrl (@binaryCacheUrls) { my $info = cachedGetInfoFrom($storePath, $pathHash, $binaryCacheUrl); - $info = getInfoFrom($storePath, $pathHash, $binaryCacheUrl) unless defined $info; - if (defined $info) { - my $decompressor; - if ($info->{compression} eq "bzip2") { $decompressor = "$Nix::Config::bzip2 -d"; } - elsif ($info->{compression} eq "xz") { $decompressor = "$Nix::Config::xz -d"; } - else { - print STDERR "unknown compression method ‘$info->{compression}’\n"; - next; - } - print STDERR "\n*** Downloading ‘$info->{url}’ into ‘$storePath’...\n"; - if (system("$Nix::Config::curl --fail --location $binaryCacheUrl/$info->{url} | $decompressor | $Nix::Config::binDir/nix-store --restore $storePath") != 0) { - die "download of `$info->{url}' failed" . ($! ? ": $!" : "") . "\n" unless $? == 0; - next; - } - # The hash in the manifest can be either in base-16 or - # base-32. Handle both. - $info->{narHash} =~ /^sha256:(.*)$/ or die "invalid hash"; - my $hash = $1; - my $hash2 = hashPath("sha256", 1, $storePath); - die "hash mismatch in downloaded path ‘$storePath’; expected $hash, got $hash2\n" - if $hash ne $hash2; - print STDERR "\n"; - return 1; + + unless (defined $info) { + next if negativeHit($storePath, $binaryCacheUrl); + my $request = addRequest($storePath, "$binaryCacheUrl/$pathHash.narinfo"); + processRequests; + $info = processNARInfo($storePath, $binaryCacheUrl, $request); } + + next unless defined $info; + + my $decompressor; + if ($info->{compression} eq "bzip2") { $decompressor = "$Nix::Config::bzip2 -d"; } + elsif ($info->{compression} eq "xz") { $decompressor = "$Nix::Config::xz -d"; } + else { + print STDERR "unknown compression method ‘$info->{compression}’\n"; + next; + } + print STDERR "\n*** Downloading ‘$info->{url}’ into ‘$storePath’...\n"; + if (system("$Nix::Config::curl --fail --location $binaryCacheUrl/$info->{url} | $decompressor | $Nix::Config::binDir/nix-store --restore $storePath") != 0) { + die "download of `$info->{url}' failed" . ($! ? ": $!" : "") . "\n" unless $? == 0; + next; + } + # The hash in the manifest can be either in base-16 or + # base-32. Handle both. + $info->{narHash} =~ /^sha256:(.*)$/ or die "invalid hash"; + my $hash = $1; + my $hash2 = hashPath("sha256", 1, $storePath); + die "hash mismatch in downloaded path ‘$storePath’; expected $hash, got $hash2\n" + if $hash ne $hash2; + print STDERR "\n"; + return 1; } return 0; @@ -300,29 +361,20 @@ initCache(); if ($ARGV[0] eq "--query") { while () { - my $cmd = $_; chomp $cmd; - + chomp; + my ($cmd, @args) = split " ", $_; + if ($cmd eq "have") { my $storePath = ; chomp $storePath; # FIXME: want to give correct info here, but it's too slow. - #print "0\n"; - my $info = getInfo($storePath); - if (defined $info) { print "1\n"; } else { print "0\n"; } + print "0\n"; + #my $info = getInfo($storePath); + #if (defined $info) { print "1\n"; } else { print "0\n"; } } elsif ($cmd eq "info") { - my $storePath = ; chomp $storePath; - my $info = getInfo($storePath); - if (defined $info) { - print "1\n"; - print $info->{deriver} ? "$Nix::Config::storeDir/$info->{deriver}" : "", "\n"; - print scalar @{$info->{refs}}, "\n"; - print "$Nix::Config::storeDir/$_\n" foreach @{$info->{refs}}; - print $info->{fileSize} || 0, "\n"; - print $info->{narSize} || 0, "\n"; - } else { - print "0\n"; - } + printInfoParallel(@args); + print "\n"; } else { die "unknown command `$cmd'"; } diff --git a/src/libstore/build.cc b/src/libstore/build.cc index d5bbd540b3..1c84e5b9f9 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -2352,10 +2352,12 @@ void SubstitutionGoal::tryNext() sub = subs.front(); subs.pop_front(); - if (!worker.store.querySubstitutablePathInfo(sub, storePath, info)) { - tryNext(); - return; - } + SubstitutablePathInfos infos; + PathSet dummy(singleton(storePath)); + worker.store.querySubstitutablePathInfos(sub, dummy, infos); + SubstitutablePathInfos::iterator k = infos.find(storePath); + if (k == infos.end()) { tryNext(); return; } + info = k->second; /* To maintain the closure invariant, we first have to realise the paths referenced by this one. */ diff --git a/src/libstore/globals.cc b/src/libstore/globals.cc index 5c22f14066..f660ed68df 100644 --- a/src/libstore/globals.cc +++ b/src/libstore/globals.cc @@ -155,8 +155,9 @@ void setDefaultsFromEnvironment() string subs = getEnv("NIX_SUBSTITUTERS", "default"); if (subs == "default") { - substituters.push_back(nixLibexecDir + "/nix/substituters/copy-from-other-stores.pl"); - substituters.push_back(nixLibexecDir + "/nix/substituters/download-using-manifests.pl"); + //substituters.push_back(nixLibexecDir + "/nix/substituters/copy-from-other-stores.pl"); + //substituters.push_back(nixLibexecDir + "/nix/substituters/download-using-manifests.pl"); + substituters.push_back(nixLibexecDir + "/nix/substituters/download-from-binary-cache.pl"); } else substituters = tokenizeString(subs, ":"); diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 1ce62aeafc..b4ea4b7481 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -936,37 +936,57 @@ bool LocalStore::hasSubstitutes(const Path & path) } -bool LocalStore::querySubstitutablePathInfo(const Path & substituter, - const Path & path, SubstitutablePathInfo & info) +void LocalStore::querySubstitutablePathInfos(const Path & substituter, + PathSet & paths, SubstitutablePathInfos & infos) { RunningSubstituter & run(runningSubstituters[substituter]); startSubstituter(substituter, run); - writeLine(run.to, "info\n" + path); + string s = "info "; + foreach (PathSet::const_iterator, i, paths) + if (infos.find(*i) == infos.end()) { s += *i; s += " "; } + writeLine(run.to, s); - if (!getIntLine(run.from)) return false; - - info.deriver = readLine(run.from); - if (info.deriver != "") assertStorePath(info.deriver); - int nrRefs = getIntLine(run.from); - while (nrRefs--) { - Path p = readLine(run.from); - assertStorePath(p); - info.references.insert(p); + while (true) { + Path path = readLine(run.from); + if (path == "") break; + assert(paths.find(path) != paths.end()); + paths.erase(path); + SubstitutablePathInfo & info(infos[path]); + info.deriver = readLine(run.from); + if (info.deriver != "") assertStorePath(info.deriver); + int nrRefs = getIntLine(run.from); + while (nrRefs--) { + Path p = readLine(run.from); + assertStorePath(p); + info.references.insert(p); + } + info.downloadSize = getIntLine(run.from); + info.narSize = getIntLine(run.from); } - info.downloadSize = getIntLine(run.from); - info.narSize = getIntLine(run.from); - - return true; } bool LocalStore::querySubstitutablePathInfo(const Path & path, SubstitutablePathInfo & info) { - foreach (Paths::iterator, i, substituters) - if (querySubstitutablePathInfo(*i, path, info)) return true; - return false; + SubstitutablePathInfos infos; + querySubstitutablePathInfos(singleton(path), infos); + SubstitutablePathInfos::iterator i = infos.find(path); + if (i == infos.end()) return false; + info = i->second; + return true; +} + + +void LocalStore::querySubstitutablePathInfos(const PathSet & paths, + SubstitutablePathInfos & infos) +{ + PathSet todo = paths; + foreach (Paths::iterator, i, substituters) { + if (todo.empty()) break; + querySubstitutablePathInfos(*i, todo, infos); + } } @@ -1110,7 +1130,7 @@ Path LocalStore::addToStore(const Path & _srcPath, method for very large paths, but `copyPath' is mainly used for small files. */ StringSink sink; - if (recursive) + if (recursive) dumpPath(srcPath, sink, filter); else sink.s = readFile(srcPath); diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index aa8e8582fb..c4d8be692a 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -128,8 +128,11 @@ public: bool querySubstitutablePathInfo(const Path & path, SubstitutablePathInfo & info); - bool querySubstitutablePathInfo(const Path & substituter, - const Path & path, SubstitutablePathInfo & info); + void querySubstitutablePathInfos(const Path & substituter, + PathSet & paths, SubstitutablePathInfos & infos); + + void querySubstitutablePathInfos(const PathSet & paths, + SubstitutablePathInfos & infos); Path addToStore(const Path & srcPath, bool recursive = true, HashType hashAlgo = htSHA256, diff --git a/src/libstore/misc.cc b/src/libstore/misc.cc index 0934999363..aa5f6ff727 100644 --- a/src/libstore/misc.cc +++ b/src/libstore/misc.cc @@ -55,45 +55,97 @@ void queryMissing(StoreAPI & store, const PathSet & targets, PathSet todo(targets.begin(), targets.end()), done; - while (!todo.empty()) { - Path p = *(todo.begin()); - todo.erase(p); - if (done.find(p) != done.end()) continue; - done.insert(p); + bool useSubstitutes = queryBoolSetting("build-use-substitutes", true); - if (isDerivation(p)) { - if (!store.isValidPath(p)) { - unknown.insert(p); - continue; + /* Getting substitute info has high latency when using the binary + cache substituter. Thus it's essential to do substitute + queries in parallel as much as possible. To accomplish this + we do the following: + + - For all paths still to be processed (‘todo’), we add all + paths for which we need info to the set ‘query’. For an + unbuilt derivation this is the output paths; otherwise, it's + the path itself. + + - We get info about all paths in ‘query’ in parallel. + + - We process the results and add new items to ‘todo’ if + necessary. E.g. if a path is substitutable, then we need to + get info on its references. + + - Repeat until ‘todo’ is empty. + */ + + while (!todo.empty()) { + + PathSet query, todoDrv, todoNonDrv; + + foreach (PathSet::iterator, i, todo) { + if (done.find(*i) != done.end()) continue; + done.insert(*i); + + if (isDerivation(*i)) { + if (!store.isValidPath(*i)) { + // FIXME: we could try to substitute p. + unknown.insert(*i); + continue; + } + Derivation drv = derivationFromPath(store, *i); + + PathSet invalid; + foreach (DerivationOutputs::iterator, j, drv.outputs) + if (!store.isValidPath(j->second.path)) invalid.insert(j->second.path); + if (invalid.empty()) continue; + + todoDrv.insert(*i); + if (useSubstitutes) query.insert(invalid.begin(), invalid.end()); } - Derivation drv = derivationFromPath(store, p); + + else { + if (store.isValidPath(*i)) continue; + query.insert(*i); + todoNonDrv.insert(*i); + } + } + + todo.clear(); + + SubstitutablePathInfos infos; + store.querySubstitutablePathInfos(query, infos); + + foreach (PathSet::iterator, i, todoDrv) { + // FIXME: cache this + Derivation drv = derivationFromPath(store, *i); bool mustBuild = false; - foreach (DerivationOutputs::iterator, i, drv.outputs) - if (!store.isValidPath(i->second.path) && - !(queryBoolSetting("build-use-substitutes", true) && store.hasSubstitutes(i->second.path))) - mustBuild = true; + if (useSubstitutes) { + foreach (DerivationOutputs::iterator, j, drv.outputs) + if (!store.isValidPath(j->second.path) && + infos.find(j->second.path) == infos.end()) + mustBuild = true; + } else + mustBuild = true; if (mustBuild) { - willBuild.insert(p); + willBuild.insert(*i); todo.insert(drv.inputSrcs.begin(), drv.inputSrcs.end()); foreach (DerivationInputs::iterator, i, drv.inputDrvs) todo.insert(i->first); - } else - foreach (DerivationOutputs::iterator, i, drv.outputs) - todo.insert(i->second.path); - } - - else { - if (store.isValidPath(p)) continue; - SubstitutablePathInfo info; - if (store.querySubstitutablePathInfo(p, info)) { - willSubstitute.insert(p); - downloadSize += info.downloadSize; - narSize += info.narSize; - todo.insert(info.references.begin(), info.references.end()); } else - unknown.insert(p); + foreach (DerivationOutputs::iterator, i, drv.outputs) + todoNonDrv.insert(i->second.path); + } + + foreach (PathSet::iterator, i, todoNonDrv) { + done.insert(*i); + SubstitutablePathInfos::iterator info = infos.find(*i); + if (info != infos.end()) { + willSubstitute.insert(*i); + downloadSize += info->second.downloadSize; + narSize += info->second.narSize; + todo.insert(info->second.references.begin(), info->second.references.end()); + } else + unknown.insert(*i); } } } diff --git a/src/libstore/remote-store.cc b/src/libstore/remote-store.cc index 5e5561a6ae..1cf67d3731 100644 --- a/src/libstore/remote-store.cc +++ b/src/libstore/remote-store.cc @@ -256,6 +256,19 @@ bool RemoteStore::querySubstitutablePathInfo(const Path & path, } +void RemoteStore::querySubstitutablePathInfos(const PathSet & paths, + SubstitutablePathInfos & infos) +{ + if (paths.empty()) return; + printMsg(lvlError, format("QUERYING %1% (REMOTE)") % showPaths(paths)); + foreach (PathSet::const_iterator, i, paths) { + SubstitutablePathInfo info; + if (querySubstitutablePathInfo(*i, info)) + infos[*i] = info; + } +} + + ValidPathInfo RemoteStore::queryPathInfo(const Path & path) { openConnection(); diff --git a/src/libstore/remote-store.hh b/src/libstore/remote-store.hh index e9f40da6db..1056a61158 100644 --- a/src/libstore/remote-store.hh +++ b/src/libstore/remote-store.hh @@ -48,6 +48,9 @@ public: bool querySubstitutablePathInfo(const Path & path, SubstitutablePathInfo & info); + void querySubstitutablePathInfos(const PathSet & paths, + SubstitutablePathInfos & infos); + Path addToStore(const Path & srcPath, bool recursive = true, HashType hashAlgo = htSHA256, PathFilter & filter = defaultPathFilter); diff --git a/src/libstore/store-api.hh b/src/libstore/store-api.hh index bf3269f578..92b2ddb1e7 100644 --- a/src/libstore/store-api.hh +++ b/src/libstore/store-api.hh @@ -86,6 +86,8 @@ struct SubstitutablePathInfo unsigned long long narSize; /* 0 = unknown */ }; +typedef std::map SubstitutablePathInfos; + struct ValidPathInfo { @@ -147,6 +149,9 @@ public: substitutable path. */ virtual bool querySubstitutablePathInfo(const Path & path, SubstitutablePathInfo & info) = 0; + + virtual void querySubstitutablePathInfos(const PathSet & paths, + SubstitutablePathInfos & infos) = 0; /* Copy the contents of a path to the store and register the validity the resulting path. The resulting path is returned.