download-from-binary-cache: use WWW::Curl

Using WWW::Curl rather than running an external curl process for every NAR info file halves the time it takes to get info thanks to libcurl's support for persistent HTTP connections. (We save a roundtrip per file.) But the real gain will come from using parallel and/or pipelined requests.
2012-07-06 00:30:40 -04:00 · 2012-07-06 00:30:40 -04:00 · cd94665f38
parent ae60643c15
commit cd94665f38
1 changed files with 68 additions and 11 deletions
--- a/scripts/download-from-binary-cache.pl.in
+++ b/scripts/download-from-binary-cache.pl.in
@ -1,10 +1,13 @@
 #! @perl@ -w @perlFlags@

-use strict;
+use DBI;
 use File::Basename;
+use IO::Select;
 use Nix::Config;
 use Nix::Store;
-use DBI;
+use WWW::Curl::Easy;
+use WWW::Curl::Multi;
+use strict;


 my @binaryCacheUrls = map { s/\/+$//; $_ } split(/ /, ($ENV{"NIX_BINARY_CACHES"} || ""));
@ -12,6 +15,58 @@ my @binaryCacheUrls = map { s/\/+$//; $_ } split(/ /, ($ENV{"NIX_BINARY_CACHES"}
 my ($dbh, $insertNAR, $queryNAR, $insertNegativeNAR, $queryNegativeNAR);
 my %cacheIds;

+my $curlm = WWW::Curl::Multi->new;
+my $activeRequests = 0;
+my $curlIdCount = 1;
+my %curlHandles;
+my $caBundle = $ENV{"CURL_CA_BUNDLE"} || $ENV{"OPENSSL_X509_CERT_FILE"};
+
+
+sub addRequest {
+    my ($url) = @_;
+    
+    my $curl = WWW::Curl::Easy->new;
+    my $curlId = $curlIdCount++;
+    $curlHandles{$curlId} = { handle => $curl, content => "" };
+
+    $curl->setopt(CURLOPT_PRIVATE, $curlId);
+    $curl->setopt(CURLOPT_URL, $url);
+    $curl->setopt(CURLOPT_WRITEDATA, \$curlHandles{$curlId}->{content});
+    $curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
+    $curl->setopt(CURLOPT_CAINFO, $caBundle) if defined $caBundle;
+
+    $curlm->add_handle($curl);
+    $activeRequests++;
+
+    return $curlHandles{$curlId};
+}
+
+
+sub processRequests {
+    while ($activeRequests) {
+        my ($rfds, $wfds, $efds) = $curlm->fdset();
+        #print STDERR "R = @{$rfds}, W = @{$wfds}, E = @{$efds}\n";
+
+        # Sleep until we can read or write some data.
+        if (scalar @{$rfds} + scalar @{$wfds} + scalar @{$efds} > 0) {
+            IO::Select->select(IO::Select->new(@{$rfds}), IO::Select->new(@{$wfds}), IO::Select->new(@{$efds}), 0.1);
+        }
+        
+        if ($curlm->perform() != $activeRequests) {
+            while (my ($id, $result) = $curlm->info_read) {
+                if ($id) {
+                    my $handle = $curlHandles{$id}->{handle};
+                    $curlHandles{$id}->{result} = $result;
+                    $curlHandles{$id}->{httpStatus} = $handle->getinfo(CURLINFO_HTTP_CODE);
+                    #print STDERR "\nRequest completed ($id, $result, $curlHandles{$id}->{httpStatus})\n";
+                    $activeRequests--;
+                    delete $curlHandles{$id}->{handle};
+                }
+            }
+        }
+    }
+}
+

 sub initCache {
    my $dbPath = "$Nix::Config::stateDir/binary-cache-v1.sqlite";
@ -86,11 +141,13 @@ sub getInfoFrom {
    
    my $infoUrl = "$binaryCacheUrl/$pathHash.narinfo";
    print STDERR "checking $infoUrl...\n";
-    my $s = `$Nix::Config::curl --fail --silent --location $infoUrl`;
-    if ($? != 0) {
-        my $status = $? >> 8;
-        if ($status != 22 && $status != 37) {
-            print STDERR "could not download ‘$infoUrl’ (curl returned status ", $? >> 8, ")\n";
+    my $request = addRequest($infoUrl);
+    processRequests;
+
+    if ($request->{result} != 0 || $request->{httpStatus} != 200) {
+        if ($request->{httpStatus} != 404) {
+            print STDERR "could not download ‘$infoUrl’ (" .
+                ($request->{result} != 0 ? "Curl error $request->{result}" : "HTTP status $request->{httpStatus}") . ")\n";
        } else {
            $insertNegativeNAR->execute($cacheId, basename($storePath), time());
        }
@ -100,7 +157,7 @@ sub getInfoFrom {
    my ($storePath2, $url, $fileHash, $fileSize, $narHash, $narSize, $deriver, $system);
    my $compression = "bzip2";
    my @refs;
-    foreach my $line (split "\n", $s) {
+    foreach my $line (split "\n", $request->{content}) {
        $line =~ /^(.*): (.*)$/ or return undef;
        if ($1 eq "StorePath") { $storePath2 = $2; }
        elsif ($1 eq "URL") { $url = $2; }
@ -248,9 +305,9 @@ if ($ARGV[0] eq "--query") {
        if ($cmd eq "have") {
            my $storePath = <STDIN>; chomp $storePath;
            # FIXME: want to give correct info here, but it's too slow.
-            print "0\n";
-            #my $info = getInfo($storePath);
-            #if (defined $info) { print "1\n"; } else { print "0\n"; }
+            #print "0\n";
+            my $info = getInfo($storePath);
+            if (defined $info) { print "1\n"; } else { print "0\n"; }
        }

        elsif ($cmd eq "info") {