From 5591fcc5292616e99d9d2478ffeb4f1b51f1899e Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 10 Apr 2011 23:22:46 +0000 Subject: [PATCH] * Cache the manifests in /nix/var/nix/manifests in a SQLite database. This significantly speeds up the download-using-manifests substituter, especially if manifests are very large. For instance, one "nix-build -A geeqie" operation that updated four packages using binary patches went from 18.5s to 1.6s. It also significantly reduces memory use. The cache is kept in /nix/var/nix/manifests/cache.sqlite. It's updated automatically when manifests are added to or removed from /nix/var/nix/manifests. It might be interesting to have nix-pull store manifests directly in the DB, rather than storing them as separate flat files, but then we would need a command line interface to delete manifests from the DB. --- scripts/NixManifest.pm.in | 142 ++++++++++++++++++++++++- scripts/download-using-manifests.pl.in | 52 ++++----- 2 files changed, 164 insertions(+), 30 deletions(-) diff --git a/scripts/NixManifest.pm.in b/scripts/NixManifest.pm.in index 21c8b6ba2a..998c23ef13 100644 --- a/scripts/NixManifest.pm.in +++ b/scripts/NixManifest.pm.in @@ -1,4 +1,7 @@ use strict; +use DBI; +use Cwd; +use File::stat; sub addPatch { @@ -34,7 +37,7 @@ sub readManifest { my $manifestVersion = 2; my ($storePath, $url, $hash, $size, $basePath, $baseHash, $patchType); - my ($narHash, $narSize, $references, $deriver, $hashAlgo, $copyFrom, $system); + my ($narHash, $narSize, $references, $deriver, $copyFrom, $system); while () { chomp; @@ -59,7 +62,6 @@ sub readManifest { undef $system; $references = ""; $deriver = ""; - $hashAlgo = "md5"; } } else { @@ -83,7 +85,7 @@ sub readManifest { { url => $url, hash => $hash, size => $size , narHash => $narHash, narSize => $narSize , references => $references - , deriver => $deriver, hashAlgo => $hashAlgo + , deriver => $deriver , system => $system }; } @@ -95,7 +97,7 @@ sub readManifest { { url => $url, hash => $hash, size => $size , basePath => $basePath, baseHash => $baseHash , narHash => $narHash, narSize => $narSize - , patchType => $patchType, hashAlgo => $hashAlgo + , patchType => $patchType }; } @@ -193,4 +195,136 @@ sub writeManifest { } +sub updateManifestDB { + my $manifestDir = ($ENV{"NIX_MANIFESTS_DIR"} or "@localstatedir@/nix/manifests"); + + my $dbPath = "$manifestDir/cache.sqlite"; + + # Open/create the database. + my $dbh = DBI->connect("dbi:SQLite:dbname=$dbPath", "", "") + or die "cannot open database `$dbPath'"; + $dbh->{AutoCommit} = 0; + $dbh->{RaiseError} = 1; + $dbh->{PrintError} = 0; + + $dbh->do("pragma foreign_keys = on"); + + # Initialise the database schema, if necessary. + $dbh->do(<do(<do("create index if not exists NARs_storePath on NARs(storePath)"); + + $dbh->do(<do("create index if not exists Patches_storePath on Patches(storePath)"); + + # !!! locking? + + # Read each manifest in $manifestDir and add it to the database, + # unless we've already done so on a previous run. + my %seen; + + for my $manifest (glob "$manifestDir/*.nixmanifest") { + $manifest = Cwd::abs_path($manifest); + my $timestamp = lstat($manifest)->mtime; + $seen{$manifest} = 1; + + next if scalar @{$dbh->selectcol_arrayref( + "select 1 from Manifests where path = ? and timestamp = ?", + {}, $manifest, $timestamp)} == 1; + + # !!! Insert directly into the DB. + my %narFiles; + my %patches; + my $version = readManifest($manifest, \%narFiles, \%patches); + + if ($version < 3) { + die "you have an old-style manifest `$manifest'; please delete it"; + } + if ($version >= 10) { + die "manifest `$manifest' is too new; please delete it or upgrade Nix"; + } + + $dbh->do("delete from Manifests where path = ?", {}, $manifest); + + $dbh->do("insert into Manifests(path, timestamp) values (?, ?)", + {}, $manifest, $timestamp); + + my $id = $dbh->sqlite_last_insert_rowid(); + + foreach my $storePath (keys %narFiles) { + my $narFileList = $narFiles{$storePath}; + foreach my $narFile (@{$narFiles{$storePath}}) { + $dbh->do( + "insert into NARs(manifest, storePath, url, hash, size, narHash, " . + "narSize, refs, deriver, system) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + {}, $id, $storePath, $narFile->{url}, $narFile->{hash}, $narFile->{size}, + $narFile->{narHash}, $narFile->{narSize}, $narFile->{references}, + $narFile->{deriver}, $narFile->{system}); + } + } + + foreach my $storePath (keys %patches) { + my $patchList = $patches{$storePath}; + foreach my $patch (@{$patchList}) { + $dbh->do( + "insert into Patches(manifest, storePath, basePath, baseHash, url, hash, " . + "size, narHash, narSize, patchType) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + {}, $id, $storePath, $patch->{basePath}, $patch->{baseHash}, $patch->{url}, + $patch->{hash}, $patch->{size}, $patch->{narHash}, $patch->{narSize}, + $patch->{patchType}); + } + } + } + + # Removed cached information for removed manifests from the DB. + foreach my $manifest (@{$dbh->selectcol_arrayref("select path from Manifests")}) { + next if defined $seen{$manifest}; + $dbh->do("delete from Manifests where path = ?", {}, $manifest); + } + + $dbh->commit; + + return $dbh; +} + + return 1; diff --git a/scripts/download-using-manifests.pl.in b/scripts/download-using-manifests.pl.in index 1d46de533d..775df4ce86 100644 --- a/scripts/download-using-manifests.pl.in +++ b/scripts/download-using-manifests.pl.in @@ -17,21 +17,8 @@ my $logFile = "@localstatedir@/log/nix/downloads"; my $fast = 1; -# Load all manifests. -my %narFiles; -my %patches; - -for my $manifest (glob "$manifestDir/*.nixmanifest") { - my $version = readManifest($manifest, \%narFiles, \%patches); - if ($version < 3) { - print STDERR "you have an old-style manifest `$manifest'; please delete it\n"; - exit 1; - } - if ($version >= 10) { - print STDERR "manifest `$manifest' is too new; please delete it or upgrade Nix\n"; - exit 1; - } -} +# Open the manifest cache and update it if necessary. +my $dbh = updateManifestDB(); sub isValidPath { @@ -110,7 +97,10 @@ sub computeSmallestDownload { else { # Add patch edges. - my $patchList = $patches{$u}; + my $patchList = $dbh->selectall_arrayref( + "select * from Patches where storePath = ?", + { Slice => {} }, $u); + foreach my $patch (@{$patchList}) { if (isValidPath($patch->{basePath})) { # !!! this should be cached @@ -129,11 +119,15 @@ sub computeSmallestDownload { } # Add NAR file edges to the start node. - my $narFileList = $narFiles{$u}; + my $narFileList = $dbh->selectall_arrayref( + "select * from NARs where storePath = ?", + { Slice => {} }, $u); + foreach my $narFile (@{$narFileList}) { # !!! how to handle files whose size is not known in advance? - # For now, assume some arbitrary size (1 MB). - addEdge \%graph, "start", $u, ($narFile->{size} || 1000000), "narfile", $narFile; + # For now, assume some arbitrary size (1 GB). + # This has the side-effect of preferring non-Hydra downloads. + addEdge \%graph, "start", $u, ($narFile->{size} || 1000000000), "narfile", $narFile; } } } @@ -160,7 +154,7 @@ sub computeSmallestDownload { $v_->{d} = $u_->{d} + $edge->{weight}; # Store the edge; to edge->start is actually the # predecessor. - $v_->{pred} = $edge; + $v_->{pred} = $edge; } } } @@ -188,15 +182,21 @@ if ($ARGV[0] eq "--query") { if ($cmd eq "have") { my $storePath = ; chomp $storePath; - print STDOUT (defined $narFiles{$storePath} ? "1\n" : "0\n"); + print STDOUT ( + scalar @{$dbh->selectcol_arrayref("select 1 from NARs where storePath = ?", {}, $storePath)} > 0 + ? "1\n" : "0\n"); } elsif ($cmd eq "info") { my $storePath = ; chomp $storePath; + my $infos = $dbh->selectall_arrayref( + "select * from NARs where storePath = ?", + { Slice => {} }, $storePath); + my $info; - if (defined $narFiles{$storePath}) { - $info = @{$narFiles{$storePath}}[0]; + if (scalar @{$infos} > 0) { + $info = @{$infos}[0]; } else { print "0\n"; @@ -205,12 +205,12 @@ if ($ARGV[0] eq "--query") { print "1\n"; print "$info->{deriver}\n"; - my @references = split " ", $info->{references}; + my @references = split " ", $info->{refs}; print scalar @references, "\n"; print "$_\n" foreach @references; my @path = computeSmallestDownload $storePath; - + my $downloadSize = 0; while (scalar @path > 0) { my $edge = pop @path; @@ -223,7 +223,7 @@ if ($ARGV[0] eq "--query") { $downloadSize += $edge->{info}->{size} || 0; } } - + print "$downloadSize\n"; my $narSize = $info->{narSize} || 0;