download-from-binary-cache: Use HEAD requests if possible

In "nix-env -qas", we don't need the substitute info, we just need to
know if it exists.  This can be done using a HTTP HEAD request, which
saves bandwidth.

Note however that curl currently has a bug that prevents it from
reusing HTTP connections if HEAD requests return a 404:

https://sourceforge.net/tracker/?func=detail&aid=3542731&group_id=976&atid=100976

Without the patch attached to the issue, using HEAD is actually quite
a bit slower than GET.
This commit is contained in:
Eelco Dolstra 2012-07-11 17:53:20 -04:00
parent 09a6321aeb
commit b74d92755d
1 changed files with 79 additions and 12 deletions

View File

@ -18,7 +18,7 @@ my @binaryCacheUrls = map { s/\/+$//; $_ } split(/ /,
my $maxParallelRequests = int($Nix::Config::config{"binary-caches-parallel-connections"} // 150);
$maxParallelRequests = 1 if $maxParallelRequests < 1;
my ($dbh, $insertNAR, $queryNAR, $insertNegativeNAR, $queryNegativeNAR);
my ($dbh, $insertNAR, $queryNAR, $insertNARExistence, $queryNARExistence);
my %cacheIds;
my $curlm = WWW::Curl::Multi->new;
@ -30,7 +30,7 @@ my $caBundle = $ENV{"CURL_CA_BUNDLE"} // $ENV{"OPENSSL_X509_CERT_FILE"};
sub addRequest {
my ($storePath, $url) = @_;
my ($storePath, $url, $head) = @_;
my $curl = WWW::Curl::Easy->new;
my $curlId = $curlIdCount++;
@ -41,6 +41,7 @@ sub addRequest {
$curl->setopt(CURLOPT_WRITEDATA, \$requests{$curlId}->{content});
$curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
$curl->setopt(CURLOPT_CAINFO, $caBundle) if defined $caBundle;
$curl->setopt(CURLOPT_NOBODY, 1) if $head;
if ($activeRequests >= $maxParallelRequests) {
$scheduled{$curlId} = 1;
@ -127,9 +128,10 @@ EOF
EOF
$dbh->do(<<EOF);
create table if not exists NegativeNARs (
create table if not exists NARExistence (
cache integer not null,
storePath text not null,
exist integer not null,
timestamp integer not null,
primary key (cache, storePath),
foreign key (cache) references BinaryCaches(id) on delete cascade
@ -142,17 +144,28 @@ EOF
$queryNAR = $dbh->prepare("select * from NARs where cache = ? and storePath = ?") or die;
$insertNegativeNAR = $dbh->prepare(
"insert or replace into NegativeNARs(cache, storePath, timestamp) values (?, ?, ?)") or die;
$insertNARExistence = $dbh->prepare(
"insert or replace into NARExistence(cache, storePath, exist, timestamp) values (?, ?, ?, ?)") or die;
$queryNegativeNAR = $dbh->prepare("select 1 from NegativeNARs where cache = ? and storePath = ?") or die;
$queryNARExistence = $dbh->prepare("select exist from NARExistence where cache = ? and storePath = ?") or die;
}
sub negativeHit {
my ($storePath, $binaryCacheUrl) = @_;
$queryNegativeNAR->execute(getCacheId($binaryCacheUrl), basename($storePath));
return @{$queryNegativeNAR->fetchall_arrayref()} != 0;
$queryNARExistence->execute(getCacheId($binaryCacheUrl), basename($storePath));
my $res = $queryNARExistence->fetchrow_hashref();
return defined $res && $res->{exist} == 0;
}
sub positiveHit {
my ($storePath, $binaryCacheUrl) = @_;
return 1 if defined getCachedInfoFrom($storePath, $binaryCacheUrl);
$queryNARExistence->execute(getCacheId($binaryCacheUrl), basename($storePath));
my $res = $queryNARExistence->fetchrow_hashref();
return defined $res && $res->{exist} == 1;
}
@ -166,7 +179,7 @@ sub processNARInfo {
print STDERR "could not download $request->{url} (" .
($request->{result} != 0 ? "Curl error $request->{result}" : "HTTP status $request->{httpStatus}") . ")\n";
} else {
$insertNegativeNAR->execute($cacheId, basename($storePath), time());
$insertNARExistence->execute($cacheId, basename($storePath), 0, time());
}
return undef;
}
@ -319,6 +332,61 @@ sub printInfoParallel {
}
sub printSubstitutablePaths {
my @paths = @_;
# First look for paths that have cached info.
my @left;
foreach my $storePath (@paths) {
my $found = 0;
foreach my $binaryCacheUrl (@binaryCacheUrls) {
if (positiveHit($storePath, $binaryCacheUrl)) {
print "$storePath\n";
$found = 1;
last;
}
}
push @left, $storePath if !$found;
}
return if scalar @left == 0;
# For remaining paths, do HEAD requests.
foreach my $binaryCacheUrl (@binaryCacheUrls) {
my $cacheId = getCacheId($binaryCacheUrl);
my @left2;
%requests = ();
foreach my $storePath (@left) {
if (negativeHit($storePath, $binaryCacheUrl)) {
push @left2, $storePath;
next;
}
addRequest($storePath, infoUrl($binaryCacheUrl, $storePath), 1);
}
processRequests;
foreach my $request (values %requests) {
if ($request->{result} != 0 || $request->{httpStatus} != 200) {
if ($request->{httpStatus} != 404) {
print STDERR "could not check $request->{url} (" .
($request->{result} != 0 ? "Curl error $request->{result}" : "HTTP status $request->{httpStatus}") . ")\n";
} else {
$insertNARExistence->execute($cacheId, basename($request->{storePath}), 0, time());
}
push @left2, $request->{storePath};
} else {
$insertNARExistence->execute($cacheId, basename($request->{storePath}), 1, time());
print "$request->{storePath}\n";
}
}
@left = @left2;
}
}
sub downloadBinary {
my ($storePath) = @_;
@ -371,9 +439,8 @@ if ($ARGV[0] eq "--query") {
my ($cmd, @args) = split " ", $_;
if ($cmd eq "have") {
my $storePath = <STDIN>; chomp $storePath;
# FIXME: want to give correct info here, but it's too slow.
print "0\n";
printSubstitutablePaths(@args);
print "\n";
}
elsif ($cmd eq "info") {