9488ae7357
package duplication present in (e.g.) a profile. It shows the number of instances of each package in a closure, along with the size in bytes of each instance as well as the "waste" (the difference between the sum of the sizes of all instances and the average size). $ ./show-duplication.pl /nix/var/nix/profiles/default gcc 11 3.3.6 19293318 3.4.4 21425257 ... average 14942970, waste 149429707 coreutils 6 ... average package duplication 1.87628865979381, total size 3486330471, total waste 1335324237, 38.3017114443825% wasted This utility is useful for measuring the cost in terms of disk space of the Nix approach.
73 lines
1.9 KiB
Prolog
Executable file
73 lines
1.9 KiB
Prolog
Executable file
#! /usr/bin/perl -w
|
|
|
|
if (scalar @ARGV != 1) {
|
|
print "syntax: show-duplication.pl PATH\n";
|
|
exit 1;
|
|
}
|
|
|
|
my $root = $ARGV[0];
|
|
|
|
|
|
my $nameRE = "(?:(?:[A-Za-z0-9\+\_]|(?:-[^0-9]))+)";
|
|
my $versionRE = "(?:[A-Za-z0-9\.\-]+)";
|
|
|
|
|
|
my %pkgInstances;
|
|
|
|
|
|
my $pid = open(PATHS, "-|") || exec "nix-store", "-qR", $root;
|
|
while (<PATHS>) {
|
|
chomp;
|
|
/^.*\/[0-9a-z]*-(.*)$/;
|
|
my $nameVersion = $1;
|
|
$nameVersion =~ /^($nameRE)(-($versionRE))?$/;
|
|
$name = $1;
|
|
$version = $3;
|
|
$version = "(unnumbered)" unless defined $version;
|
|
# print "$nameVersion $name $version\n";
|
|
push @{$pkgInstances{$name}}, {version => $version, path => $_};
|
|
}
|
|
close PATHS or exit 1;
|
|
|
|
|
|
sub pathSize {
|
|
my $path = shift;
|
|
my @st = lstat $path or die;
|
|
|
|
my $size = $st[7];
|
|
|
|
if (-d $path) {
|
|
opendir DIR, $path or die;
|
|
foreach my $name (readdir DIR) {
|
|
next if $name eq "." || $name eq "..";
|
|
$size += pathSize("$path/$name");
|
|
}
|
|
}
|
|
|
|
return $size;
|
|
}
|
|
|
|
|
|
my $totalPaths = 0;
|
|
my $totalSize = 0, $totalWaste = 0;
|
|
|
|
foreach my $name (sort {scalar @{$pkgInstances{$b}} <=> scalar @{$pkgInstances{$a}}} (keys %pkgInstances)) {
|
|
print "$name ", scalar @{$pkgInstances{$name}}, "\n";
|
|
my $allSize = 0;
|
|
foreach my $x (sort {$a->{version} cmp $b->{version}} @{$pkgInstances{$name}}) {
|
|
$totalPaths++;
|
|
my $size = pathSize $x->{path};
|
|
$allSize += $size;
|
|
print " $x->{version} $size\n";
|
|
}
|
|
my $avgSize = int($allSize / scalar @{$pkgInstances{$name}});
|
|
my $waste = $allSize - $avgSize;
|
|
$totalSize += $allSize;
|
|
$totalWaste += $waste;
|
|
print " average $avgSize, waste $waste\n";
|
|
}
|
|
|
|
|
|
my $avgDupl = $totalPaths / scalar (keys %pkgInstances);
|
|
my $wasteFactor = ($totalWaste / $totalSize) * 100;
|
|
print "average package duplication $avgDupl, total size $totalSize, total waste $totalWaste, $wasteFactor% wasted\n";
|