From 3d38a498404bf842ca479d42d18def1f472a6fb0 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 17 Nov 2010 17:32:25 +0000 Subject: [PATCH] * In the download size indication, take binary patches into account. Hopefully this doesn't slow things down too much. --- scripts/download-using-manifests.pl.in | 305 +++++++++++++------------ tests/binary-patching.nix | 2 +- tests/binary-patching.sh | 6 + 3 files changed, 168 insertions(+), 145 deletions(-) diff --git a/scripts/download-using-manifests.pl.in b/scripts/download-using-manifests.pl.in index 7b3f7ee665..7147090c4b 100644 --- a/scripts/download-using-manifests.pl.in +++ b/scripts/download-using-manifests.pl.in @@ -31,6 +31,144 @@ for my $manifest (glob "$manifestDir/*.nixmanifest") { } +sub isValidPath { + my $p = shift; + return system("$binDir/nix-store --check-validity '$p' 2> /dev/null") == 0; +} + + +sub parseHash { + my $hash = shift; + if ($hash =~ /^(.+):(.+)$/) { + return ($1, $2); + } else { + return ("md5", $hash); + } +} + + +# Compute the most efficient sequence of downloads to produce the +# given path. +sub computeSmallestDownload { + my $targetPath = shift; + + # Build a graph of all store paths that might contribute to the + # construction of $targetPath, and the special node "start". The + # edges are either patch operations, or downloads of full NAR + # files. The latter edges only occur between "start" and a store + # path. + my %graph; + + $graph{"start"} = {d => 0, pred => undef, edges => []}; + + my @queue = (); + my $queueFront = 0; + my %done; + + sub addNode { + my $graph = shift; + my $u = shift; + $$graph{$u} = {d => 999999999999, pred => undef, edges => []} + unless defined $$graph{$u}; + } + + sub addEdge { + my $graph = shift; + my $u = shift; + my $v = shift; + my $w = shift; + my $type = shift; + my $info = shift; + addNode $graph, $u; + push @{$$graph{$u}->{edges}}, + {weight => $w, start => $u, end => $v, type => $type, info => $info}; + my $n = scalar @{$$graph{$u}->{edges}}; + } + + push @queue, $targetPath; + + while ($queueFront < scalar @queue) { + my $u = $queue[$queueFront++]; + return if defined $done{$u}; + $done{$u} = 1; + + addNode \%graph, $u; + + # If the path already exists, it has distance 0 from the + # "start" node. + if (isValidPath($u)) { + addEdge \%graph, "start", $u, 0, "present", undef; + } + + else { + + # Add patch edges. + my $patchList = $patches{$u}; + foreach my $patch (@{$patchList}) { + if (isValidPath($patch->{basePath})) { + # !!! this should be cached + my ($baseHashAlgo, $baseHash) = parseHash $patch->{baseHash}; + my $format = "--base32"; + $format = "" if $baseHashAlgo eq "md5"; + my $hash = `$binDir/nix-hash --type '$baseHashAlgo' $format "$patch->{basePath}"`; + chomp $hash; + next if $hash ne $baseHash; + } + push @queue, $patch->{basePath}; + addEdge \%graph, $patch->{basePath}, $u, $patch->{size}, "patch", $patch; + } + + # Add NAR file edges to the start node. + my $narFileList = $narFiles{$u}; + foreach my $narFile (@{$narFileList}) { + # !!! how to handle files whose size is not known in advance? + # For now, assume some arbitrary size (1 MB). + addEdge \%graph, "start", $u, ($narFile->{size} || 1000000), "narfile", $narFile; + } + } + } + + + # Run Dijkstra's shortest path algorithm to determine the shortest + # sequence of download and/or patch actions that will produce + # $targetPath. + + my @todo = keys %graph; + + while (scalar @todo > 0) { + + # Remove the closest element from the todo list. + # !!! inefficient, use a priority queue + @todo = sort { -($graph{$a}->{d} <=> $graph{$b}->{d}) } @todo; + my $u = pop @todo; + + my $u_ = $graph{$u}; + + foreach my $edge (@{$u_->{edges}}) { + my $v_ = $graph{$edge->{end}}; + if ($v_->{d} > $u_->{d} + $edge->{weight}) { + $v_->{d} = $u_->{d} + $edge->{weight}; + # Store the edge; to edge->start is actually the + # predecessor. + $v_->{pred} = $edge; + } + } + } + + + # Retrieve the shortest path from "start" to $targetPath. + my @path = (); + my $cur = $targetPath; + return () unless defined $graph{$targetPath}->{pred}; + while ($cur ne "start") { + push @path, $graph{$cur}->{pred}; + $cur = $graph{$cur}->{pred}->{start}; + } + + return @path; +} + + # Parse the arguments. if ($ARGV[0] eq "--query") { @@ -46,6 +184,7 @@ if ($ARGV[0] eq "--query") { elsif ($cmd eq "info") { my $storePath = ; chomp $storePath; + my $info; if (defined $narFiles{$storePath}) { $info = @{$narFiles{$storePath}}[0]; @@ -57,13 +196,30 @@ if ($ARGV[0] eq "--query") { print "0\n"; next; # not an error } + print "1\n"; print "$info->{deriver}\n"; my @references = split " ", $info->{references}; print scalar @references, "\n"; print "$_\n" foreach @references; - my $size = $info->{size} || 0; - print "$size\n"; + + my @path = computeSmallestDownload $storePath; + + my $downloadSize = 0; + while (scalar @path > 0) { + my $edge = pop @path; + my $u = $edge->{start}; + my $v = $edge->{end}; + if ($edge->{type} eq "patch") { + $downloadSize += $edge->{info}->{size}; + } + elsif ($edge->{type} eq "narfile") { + $downloadSize += $edge->{info}->{size}; + } + } + + print "$downloadSize\n"; + my $narSize = $info->{narSize} || 0; print "$narSize\n"; } @@ -112,148 +268,9 @@ foreach my $localPath (@{$localPathList}) { } -# Build a graph of all store paths that might contribute to the -# construction of $targetPath, and the special node "start". The -# edges are either patch operations, or downloads of full NAR files. -# The latter edges only occur between "start" and a store path. - -my %graph; - -$graph{"start"} = {d => 0, pred => undef, edges => []}; - -my @queue = (); -my $queueFront = 0; -my %done; - -sub addToQueue { - my $v = shift; - return if defined $done{$v}; - $done{$v} = 1; - push @queue, $v; -} - -sub addNode { - my $u = shift; - $graph{$u} = {d => 999999999999, pred => undef, edges => []} - unless defined $graph{$u}; -} - -sub addEdge { - my $u = shift; - my $v = shift; - my $w = shift; - my $type = shift; - my $info = shift; - addNode $u; - push @{$graph{$u}->{edges}}, - {weight => $w, start => $u, end => $v, type => $type, info => $info}; - my $n = scalar @{$graph{$u}->{edges}}; -} - -addToQueue $targetPath; - -sub isValidPath { - my $p = shift; - return system("$binDir/nix-store --check-validity '$p' 2> /dev/null") == 0; -} - -sub parseHash { - my $hash = shift; - if ($hash =~ /^(.+):(.+)$/) { - return ($1, $2); - } else { - return ("md5", $hash); - } -} - -while ($queueFront < scalar @queue) { - my $u = $queue[$queueFront++]; -# print "$u\n"; - - addNode $u; - - # If the path already exists, it has distance 0 from the "start" - # node. - if (isValidPath($u)) { - addEdge "start", $u, 0, "present", undef; - } - - else { - - # Add patch edges. - my $patchList = $patches{$u}; - foreach my $patch (@{$patchList}) { - if (isValidPath($patch->{basePath})) { - # !!! this should be cached - my ($baseHashAlgo, $baseHash) = parseHash $patch->{baseHash}; - my $format = "--base32"; - $format = "" if $baseHashAlgo eq "md5"; - my $hash = `$binDir/nix-hash --type '$baseHashAlgo' $format "$patch->{basePath}"`; - chomp $hash; - if ($hash ne $baseHash) { - print LOGFILE "$$ rejecting $patch->{basePath}\n"; - next; - } - } - addToQueue $patch->{basePath}; - addEdge $patch->{basePath}, $u, $patch->{size}, "patch", $patch; - } - - # Add NAR file edges to the start node. - my $narFileList = $narFiles{$u}; - foreach my $narFile (@{$narFileList}) { - # !!! how to handle files whose size is not known in advance? - # For now, assume some arbitrary size (1 MB). - addEdge "start", $u, ($narFile->{size} || 1000000), "narfile", $narFile; - if ($u eq $targetPath) { - my $size = $narFile->{size} || -1; - print LOGFILE "$$ full-download-would-be $size\n"; - } - } - - } -} - - -# Run Dijkstra's shortest path algorithm to determine the shortest -# sequence of download and/or patch actions that will produce -# $targetPath. - -sub byDistance { # sort by distance, reversed - return -($graph{$a}->{d} <=> $graph{$b}->{d}); -} - -my @todo = keys %graph; - -while (scalar @todo > 0) { - - # Remove the closest element from the todo list. - @todo = sort byDistance @todo; - my $u = pop @todo; - - my $u_ = $graph{$u}; - - foreach my $edge (@{$u_->{edges}}) { - my $v_ = $graph{$edge->{end}}; - if ($v_->{d} > $u_->{d} + $edge->{weight}) { - $v_->{d} = $u_->{d} + $edge->{weight}; - # Store the edge; to edge->start is actually the - # predecessor. - $v_->{pred} = $edge; - } - } -} - - -# Retrieve the shortest path from "start" to $targetPath. -my @path = (); -my $cur = $targetPath; -die "don't know how to produce $targetPath\n" - unless defined $graph{$targetPath}->{pred}; -while ($cur ne "start") { - push @path, $graph{$cur}->{pred}; - $cur = $graph{$cur}->{pred}->{start}; -} +# Compute the shortest path. +my @path = computeSmallestDownload $targetPath; +die "don't know how to produce $targetPath\n" if scalar @path == 0; # Traverse the shortest path, perform the actions described by the diff --git a/tests/binary-patching.nix b/tests/binary-patching.nix index 781bd76eba..afa0a0fb3f 100644 --- a/tests/binary-patching.nix +++ b/tests/binary-patching.nix @@ -9,7 +9,7 @@ mkDerivation { mkdir $out seq 1 1000000 > $out/foo ${if version == 2 then '' - echo bla >> $out/foo + seq 1000000 1010000 >> $out/foo '' else ""} ''; } diff --git a/tests/binary-patching.sh b/tests/binary-patching.sh index 8d7788fb61..26a499727f 100644 --- a/tests/binary-patching.sh +++ b/tests/binary-patching.sh @@ -1,5 +1,7 @@ source common.sh +clearManifests + mkdir -p $TEST_ROOT/cache2 $TEST_ROOT/patches RESULT=$TEST_ROOT/result @@ -29,5 +31,9 @@ $NIX_BIN_DIR/nix-pull file://$TEST_ROOT/manifest2 # To make sure that we're using the patch, delete the full NARs. rm -f $TEST_ROOT/cache2/* +# Make sure that the download size prediction uses the patch rather +# than the full download. +$nixbuild -o $RESULT binary-patching.nix --arg version 2 --dry-run 2>&1 | grep -q "0.01 MiB" + # Now rebuild it. This should use the patch generated above. $nixbuild -o $RESULT binary-patching.nix --arg version 2