From d329c3ea9dde17a665b32a1716d02eb13627826d Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 18 Jul 2011 23:31:03 +0000 Subject: [PATCH] =?UTF-8?q?*=20Support=20multiple=20outputs.=20=20A=20deri?= =?UTF-8?q?vation=20can=20declare=20multiple=20outputs=20=20=20by=20settin?= =?UTF-8?q?g=20the=20=E2=80=98outputs=E2=80=99=20attribute.=20=20For=20exa?= =?UTF-8?q?mple:?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stdenv.mkDerivation { name = "aterm-2.5"; src = ...; outputs = [ "out" "tools" "dev" ]; configureFlags = "--bindir=$(tools)/bin --includedir=$(dev)/include"; } This derivation creates three outputs, named like this: /nix/store/gcnqgllbh01p3d448q8q6pzn2nc2gpyl-aterm-2.5 /nix/store/gjf1sgirwfnrlr0bdxyrwzpw2r304j02-aterm-2.5-tools /nix/store/hp6108bqfgxvza25nnxfs7kj88xi2vdx-aterm-2.5-dev That is, the symbolic name of the output is suffixed to the store path (except for the ‘out’ output). Each path is passed to the builder through the corresponding environment variable, e.g., ${tools}. The main reason for multiple outputs is to allow parts of a package to be distributed and garbage-collected separately. For instance, most packages depend on Glibc for its libraries, but don't need its header files. If these are separated into different store paths, then a package that depends on the Glibc libraries only causes the libraries and not the headers to be downloaded. The main problem with multiple outputs is that if one output exists while the others have been garbage-collected (or never downloaded in the first place), and we want to rebuild the other outputs, then this isn't possible because we can't clobber a valid output (it might be in active use). This currently gives an error message like: error: derivation `/nix/store/1s9zw4c8qydpjyrayxamx2z7zzp5pcgh-aterm-2.5.drv' is blocked by its output paths There are two solutions: 1) Do the build in a chroot. Then we don't need to overwrite the existing path. 2) Use hash rewriting (see the ASE-2005 paper). Scary but it should work. This is not finished yet. There is not yet an easy way to refer to non-default outputs in Nix expressions. Also, mutually recursive outputs aren't detected yet and cause the garbage collector to crash. --- src/libexpr/primops.cc | 129 +++++++++++++++++++++++++++-------------- 1 file changed, 84 insertions(+), 45 deletions(-) diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 3e955ea3fe..e58f9265f0 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -337,6 +337,9 @@ static void prim_derivationStrict(EvalState & state, Value * * args, Value & v) string outputHash, outputHashAlgo; bool outputHashRecursive = false; + StringSet outputs; + outputs.insert("out"); + foreach (Bindings::iterator, i, *args[0]->attrs) { string key = i->name; startNest(nest, lvlVomit, format("processing attribute `%1%'") % key); @@ -368,6 +371,24 @@ static void prim_derivationStrict(EvalState & state, Value * * args, Value & v) else if (s == "flat") outputHashRecursive = false; else throw EvalError(format("invalid value `%1%' for `outputHashMode' attribute") % s); } + else if (key == "outputs") { + Strings tmp = tokenizeString(s); + outputs.clear(); + foreach (Strings::iterator, j, tmp) { + if (outputs.find(*j) != outputs.end()) + throw EvalError(format("duplicate derivation output `%1%'") % *j); + /* !!! Check whether *j is a valid attribute + name. */ + /* Derivations cannot be named ‘drv’, because + then we'd have an attribute ‘drvPath’ in + the resulting set. */ + if (*j == "drv") + throw EvalError(format("invalid derivation output name `drv'") % *j); + outputs.insert(*j); + } + if (outputs.empty()) + throw EvalError("derivation cannot have an empty set of outputs"); + } } } catch (Error & e) { @@ -424,54 +445,68 @@ static void prim_derivationStrict(EvalState & state, Value * * args, Value & v) if (drv.platform == "") throw EvalError("required attribute `system' missing"); - /* If an output hash was given, check it. */ - Path outPath; - if (outputHash == "") - outputHashAlgo = ""; - else { - HashType ht = parseHashType(outputHashAlgo); - if (ht == htUnknown) - throw EvalError(format("unknown hash algorithm `%1%'") % outputHashAlgo); - Hash h(ht); - if (outputHash.size() == h.hashSize * 2) - /* hexadecimal representation */ - h = parseHash(ht, outputHash); - else if (outputHash.size() == hashLength32(h)) - /* base-32 representation */ - h = parseHash32(ht, outputHash); - else - throw Error(format("hash `%1%' has wrong length for hash type `%2%'") - % outputHash % outputHashAlgo); - string s = outputHash; - outputHash = printHash(h); - outPath = makeFixedOutputPath(outputHashRecursive, ht, h, drvName); - if (outputHashRecursive) outputHashAlgo = "r:" + outputHashAlgo; - } - /* Check whether the derivation name is valid. */ checkStoreName(drvName); if (isDerivation(drvName)) throw EvalError(format("derivation names are not allowed to end in `%1%'") % drvExtension); - /* Construct the "masked" derivation store expression, which is - the final one except that in the list of outputs, the output - paths are empty, and the corresponding environment variables - have an empty value. This ensures that changes in the set of - output names do get reflected in the hash. */ - drv.env["out"] = ""; - drv.outputs["out"] = DerivationOutput("", outputHashAlgo, outputHash); - + /* Construct the "masked" store derivation, which is the final one + except that in the list of outputs, the output paths are empty, + and the corresponding environment variables have an empty + value. This ensures that changes in the set of output names do + get reflected in the hash. + + However, for fixed-output derivations, we can compute the + output path directly, so we don't need this. */ + bool fixedOnly = true; + foreach (StringSet::iterator, i, outputs) { + if (*i != "out" || outputHash == "") { + drv.env[*i] = ""; + drv.outputs[*i] = DerivationOutput("", "", ""); + fixedOnly = false; + } else { + /* If an output hash was given, check it, and compute the + output path. */ + HashType ht = parseHashType(outputHashAlgo); + if (ht == htUnknown) + throw EvalError(format("unknown hash algorithm `%1%'") % outputHashAlgo); + Hash h(ht); + if (outputHash.size() == h.hashSize * 2) + /* hexadecimal representation */ + h = parseHash(ht, outputHash); + else if (outputHash.size() == hashLength32(h)) + /* base-32 representation */ + h = parseHash32(ht, outputHash); + else + throw Error(format("hash `%1%' has wrong length for hash type `%2%'") + % outputHash % outputHashAlgo); + string s = outputHash; + outputHash = printHash(h); + if (outputHashRecursive) outputHashAlgo = "r:" + outputHashAlgo; + Path outPath = makeFixedOutputPath(outputHashRecursive, ht, h, drvName); + drv.env[*i] = outPath; + drv.outputs[*i] = DerivationOutput(outPath, outputHashAlgo, outputHash); + } + } + /* Use the masked derivation expression to compute the output - path. */ - if (outPath == "") - outPath = makeStorePath("output:out", hashDerivationModulo(state, drv), drvName); - - /* Construct the final derivation store expression. */ - drv.env["out"] = outPath; - drv.outputs["out"] = - DerivationOutput(outPath, outputHashAlgo, outputHash); - + path. !!! Isn't it a potential security problem that the name + of each output path (including the suffix) isn't taken into + account? For instance, changing the suffix for one path + (‘i->first == "out" ...’) doesn't affect the hash of the + others. Is that exploitable? */ + if (!fixedOnly) { + Hash h = hashDerivationModulo(state, drv); + foreach (DerivationOutputs::iterator, i, drv.outputs) + if (i->second.path == "") { + Path outPath = makeStorePath("output:" + i->first, h, + drvName + (i->first == "out" ? "" : "-" + i->first)); + drv.env[i->first] = outPath; + i->second.path = outPath; + } + } + /* Write the resulting term into the Nix store directory. */ Path drvPath = writeDerivation(drv, drvName); @@ -479,14 +514,18 @@ static void prim_derivationStrict(EvalState & state, Value * * args, Value & v) % drvName % drvPath); /* Optimisation, but required in read-only mode! because in that - case we don't actually write store expressions, so we can't + case we don't actually write store derivations, so we can't read them later. */ state.drvHashes[drvPath] = hashDerivationModulo(state, drv); - /* !!! assumes a single output */ - state.mkAttrs(v, 2); - mkString(*state.allocAttr(v, state.sOutPath), outPath, singleton(drvPath)); + state.mkAttrs(v, 1 + drv.outputs.size()); mkString(*state.allocAttr(v, state.sDrvPath), drvPath, singleton("=" + drvPath)); + foreach (DerivationOutputs::iterator, i, drv.outputs) { + /* The output path of an output X is ‘Path’, + e.g. ‘outPath’. */ + mkString(*state.allocAttr(v, state.symbols.create(i->first + "Path")), + i->second.path, singleton(drvPath)); + } v.attrs->sort(); }