From dbd4d2d0707b486f1e2c8659e94e1d3b15e4351e Mon Sep 17 00:00:00 2001 From: Pierre Langlois Date: Fri, 25 Nov 2022 01:57:21 +0000 Subject: [PATCH] build-system: Add tree-sitter-build-system. * guix/build-system/tree-sitter.scm: New module. * guix/build/tree-sitter-build-system.scm: Likewise. * Makefile.am (MODULES): Add them. * doc/guix.texi: Document it. Signed-off-by: Andrew Tropin --- Makefile.am | 2 + doc/guix.texi | 21 ++- guix/build-system/tree-sitter.scm | 195 ++++++++++++++++++++++++ guix/build/tree-sitter-build-system.scm | 153 +++++++++++++++++++ 4 files changed, 370 insertions(+), 1 deletion(-) create mode 100644 guix/build-system/tree-sitter.scm create mode 100644 guix/build/tree-sitter-build-system.scm diff --git a/Makefile.am b/Makefile.am index a4b6f03b3a..5ce6cc84f4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -178,6 +178,7 @@ MODULES = \ guix/build-system/ruby.scm \ guix/build-system/scons.scm \ guix/build-system/texlive.scm \ + guix/build-system/tree-sitter.scm \ guix/build-system/trivial.scm \ guix/ftp-client.scm \ guix/http-client.scm \ @@ -234,6 +235,7 @@ MODULES = \ guix/build/ruby-build-system.scm \ guix/build/scons-build-system.scm \ guix/build/texlive-build-system.scm \ + guix/build/tree-sitter-build-system.scm \ guix/build/waf-build-system.scm \ guix/build/haskell-build-system.scm \ guix/build/julia-build-system.scm \ diff --git a/doc/guix.texi b/doc/guix.texi index 6c7c918eb0..44e2165a82 100644 --- a/doc/guix.texi +++ b/doc/guix.texi @@ -79,7 +79,7 @@ Copyright @copyright{} 2020 Jack Hill@* Copyright @copyright{} 2020 Naga Malleswari@* Copyright @copyright{} 2020, 2021 Brice Waegeneire@* Copyright @copyright{} 2020 R Veera Kumar@* -Copyright @copyright{} 2020, 2021 Pierre Langlois@* +Copyright @copyright{} 2020, 2021, 2022 Pierre Langlois@* Copyright @copyright{} 2020 pinoaffe@* Copyright @copyright{} 2020 André Batista@* Copyright @copyright{} 2020, 2021 Alexandru-Sergiu Marton@* @@ -9756,6 +9756,25 @@ be specified with the @code{#:node} parameter which defaults to @code{node}. @end defvar +@defvr {Scheme Variable} tree-sitter-build-system + +This variable is exported by @code{(guix build-system tree-sitter)}. It +implements procedures to compile grammars for the +@url{https://tree-sitter.github.io/tree-sitter/, Tree-sitter} parsing +library. It essentially runs @code{tree-sitter generate} to translate +@code{grammar.js} grammars to JSON and then to C. Which it then +compiles to native code. + +Tree-sitter packages may support multiple grammars, so this build system +supports a @code{#:grammar-directories} keyword to specify a list of +locations where a @code{grammar.js} file may be found. + +Grammars sometimes depend on each other, such as C++ depending on C and +TypeScript depending on JavaScript. You may use inputs to declare such +dependencies. + +@end defvr + Lastly, for packages that do not need anything as sophisticated, a ``trivial'' build system is provided. It is trivial in the sense that it provides basically no support: it does not pull any implicit inputs, diff --git a/guix/build-system/tree-sitter.scm b/guix/build-system/tree-sitter.scm new file mode 100644 index 0000000000..21c4eb35b2 --- /dev/null +++ b/guix/build-system/tree-sitter.scm @@ -0,0 +1,195 @@ +;;; GNU Guix --- Functional package management for GNU +;;; Copyright © 2022 Pierre Langlois +;;; +;;; This file is part of GNU Guix. +;;; +;;; GNU Guix is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; GNU Guix is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with GNU Guix. If not, see . + +(define-module (guix build-system tree-sitter) + #:use-module (guix store) + #:use-module (guix utils) + #:use-module (guix packages) + #:use-module (guix gexp) + #:use-module (guix monads) + #:use-module (guix search-paths) + #:use-module (guix build-system) + #:use-module (guix build-system gnu) + #:use-module (guix build-system node) + #:use-module (ice-9 match) + #:export (%tree-sitter-build-system-modules + tree-sitter-build + tree-sitter-build-system)) + +(define %tree-sitter-build-system-modules + ;; Build-side modules imported by default. + `((guix build tree-sitter-build-system) + ,@%node-build-system-modules)) + +(define* (lower name + #:key source inputs native-inputs outputs system target + #:allow-other-keys + #:rest arguments) + "Return a bag for NAME from the given arguments." + (define private-keywords + `(#:inputs #:native-inputs #:outputs ,@(if target + '() + '(#:target)))) + (define node + (module-ref (resolve-interface '(gnu packages node)) + 'node-lts)) + (define tree-sitter + (module-ref (resolve-interface '(gnu packages tree-sitter)) + 'tree-sitter)) + (define tree-sitter-cli + (module-ref (resolve-interface '(gnu packages tree-sitter)) + 'tree-sitter-cli)) + ;; Grammars depend on each other via JS modules, which we package into a + ;; dedicated js output. + (define grammar-inputs + (map (match-lambda + ((name package) + `(,name ,package "js"))) + inputs)) + (bag + (name name) + (system system) (target target) + (build-inputs `(,@(if source + `(("source" ,source)) + '()) + ("node" ,node) + ("tree-sitter-cli" ,tree-sitter-cli) + ,@native-inputs + ,@(if target '() grammar-inputs) + ;; Keep the standard inputs of 'gnu-build-system'. + ,@(if target + (standard-cross-packages target 'host) + '()) + ,@(standard-packages))) + (host-inputs `(("tree-sitter" ,tree-sitter) + ,@(if target grammar-inputs '()))) + ;; Keep the standard inputs of 'gnu-buid-system'. + (target-inputs (if target + (standard-cross-packages target 'target) + '())) + ;; XXX: this is a hack to get around issue #41569. + (outputs (match outputs + (("out") (cons "js" outputs)) + (_ outputs))) + (build (if target tree-sitter-cross-build tree-sitter-build)) + (arguments (strip-keyword-arguments private-keywords arguments)))) + +(define* (tree-sitter-build name inputs + #:key + source + (phases '%standard-phases) + (grammar-directories '(".")) + (tests? #t) + (outputs '("out" "js")) + (search-paths '()) + (system (%current-system)) + (guile #f) + (imported-modules %tree-sitter-build-system-modules) + (modules '((guix build utils) + (guix build tree-sitter-build-system)))) + (define builder + (with-imported-modules imported-modules + #~(begin + (use-modules #$@(sexp->gexp modules)) + (tree-sitter-build #:name #$name + #:source #+source + #:system #$system + #:phases #$phases + #:tests? #$tests? + #:grammar-directories '#$grammar-directories + #:outputs #$(outputs->gexp outputs) + #:search-paths + '#$(sexp->gexp + (map search-path-specification->sexp + search-paths)) + #:inputs #$(input-tuples->gexp inputs))))) + + (mlet %store-monad ((guile (package->derivation (or guile (default-guile)) + system #:graft? #f))) + (gexp->derivation name builder + #:system system + #:guile-for-build guile))) + +(define* (tree-sitter-cross-build name + #:key + target + build-inputs target-inputs host-inputs + guile source + (phases '%standard-phases) + (grammar-directories '(".")) + (tests? #t) + (outputs '("out" "js")) + (search-paths '()) + (native-search-paths '()) + (system (%current-system)) + (build (nix-system->gnu-triplet system)) + (imported-modules + %tree-sitter-build-system-modules) + (modules + '((guix build utils) + (guix build tree-sitter-build-system)))) + (define builder + (with-imported-modules imported-modules + #~(begin + (use-modules #$@(sexp->gexp modules)) + + (define %build-host-inputs + #+(input-tuples->gexp build-inputs)) + + (define %build-target-inputs + (append #$(input-tuples->gexp host-inputs) + #+(input-tuples->gexp target-inputs))) + + (define %build-inputs + (append %build-host-inputs %build-target-inputs)) + + (tree-sitter-build #:name #$name + #:source #+source + #:system #$system + #:build #$build + #:target #$target + #:phases #$phases + #:tests? #$tests? + #:grammar-directories '#$grammar-directories + #:outputs #$(outputs->gexp outputs) + #:inputs %build-target-inputs + #:native-inputs %build-host-inputs + #:search-paths ' + #$(sexp->gexp + (map search-path-specification->sexp + search-paths)) + #:native-search-paths + '#$(sexp->gexp + (map + search-path-specification->sexp + native-search-paths)))))) + + (mlet %store-monad ((guile (package->derivation (or guile (default-guile)) + system #:graft? #f))) + (gexp->derivation name builder + #:system system + #:target target + #:guile-for-build guile))) + +(define tree-sitter-build-system + (build-system + (name 'tree-sitter) + (description "The Tree-sitter grammar build system") + (lower lower))) + +;;; tree-sitter.scm ends here diff --git a/guix/build/tree-sitter-build-system.scm b/guix/build/tree-sitter-build-system.scm new file mode 100644 index 0000000000..4106728bdf --- /dev/null +++ b/guix/build/tree-sitter-build-system.scm @@ -0,0 +1,153 @@ +;;; GNU Guix --- Functional package management for GNU +;;; Copyright © 2022 Pierre Langlois +;;; +;;; This file is part of GNU Guix. +;;; +;;; GNU Guix is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; GNU Guix is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with GNU Guix. If not, see . + +(define-module (guix build tree-sitter-build-system) + #:use-module ((guix build node-build-system) #:prefix node:) + #:use-module (guix build json) + #:use-module (guix build utils) + #:use-module (ice-9 match) + #:use-module (ice-9 regex) + #:use-module (srfi srfi-1) + #:export (%standard-phases + tree-sitter-build)) + +;; Commentary: +;; +;; Build procedures for tree-sitter grammar packages. This is the +;; builder-side code, which builds on top of the node build-system. +;; +;; Tree-sitter grammars are written in JavaScript and compiled to a native +;; shared object. The `tree-sitter generate' command invokes `node' in order +;; to evaluate the grammar.js into a grammar.json file, which is then +;; translated into C code. We then compile the C code ourselves. Packages +;; also sometimes add extra manually written C/C++ code. +;; +;; In order to support grammars depending on each other, such as C and C++, +;; JavaScript and TypeScript, this build-system installs the source of the +;; node module in a dedicated "js" output. +;; +;; Code: + +(define* (patch-dependencies #:key inputs #:allow-other-keys) + "Rewrite dependencies in 'package.json'. We remove all runtime dependencies +and replace development dependencies with tree-sitter grammar node modules." + + (define (rewrite package.json) + (map (match-lambda + (("dependencies" @ . _) + '("dependencies" @)) + (("devDependencies" @ . _) + `("devDependencies" @ + ,@(filter-map (match-lambda + ((key . directory) + (let ((node-module + (string-append directory + "/lib/node_modules/" + key))) + (and (directory-exists? node-module) + `(,key . ,node-module))))) + (alist-delete "node" inputs)))) + (other other)) + package.json)) + + (node:with-atomic-json-file-replacement "package.json" + (match-lambda + (('@ . package.json) + (cons '@ (rewrite package.json)))))) + +;; FIXME: The node build-system's configure phase does not support +;; cross-compiling so we re-define it. +(define* (configure #:key native-inputs inputs #:allow-other-keys) + (invoke (search-input-file (or native-inputs inputs) "/bin/npm") + "--offline" "--ignore-scripts" "install")) + +(define* (build #:key grammar-directories #:allow-other-keys) + (for-each (lambda (dir) + (with-directory-excursion dir + ;; Avoid generating binding code for other languages, we do + ;; not support this use-case yet and it relies on running + ;; `node-gyp' to build native addons. + (invoke "tree-sitter" "generate" "--no-bindings"))) + grammar-directories)) + +(define* (check #:key grammar-directories tests? #:allow-other-keys) + (when tests? + (for-each (lambda (dir) + (with-directory-excursion dir + (invoke "tree-sitter" "test"))) + grammar-directories))) + +(define* (install #:key target grammar-directories outputs #:allow-other-keys) + (let ((lib (string-append (assoc-ref outputs "out") + "/lib/tree-sitter"))) + (mkdir-p lib) + (define (compile-language dir) + (with-directory-excursion dir + (let ((lang (assoc-ref (call-with-input-file "src/grammar.json" + read-json) + "name")) + (source-file (lambda (path) + (if (file-exists? path) + path + #f)))) + (apply invoke + `(,(if target + (string-append target "-g++") + "g++") + "-shared" + "-fPIC" + "-fno-exceptions" + "-O2" + "-g" + "-o" ,(string-append lib "/libtree-sitter-" lang ".so") + ;; An additional `scanner.{c,cc}' file is sometimes + ;; provided. + ,@(cond + ((source-file "src/scanner.c") + => (lambda (file) (list "-xc" "-std=c99" file))) + ((source-file "src/scanner.cc") + => (lambda (file) (list file))) + (else '())) + "-xc" "src/parser.c"))))) + (for-each compile-language grammar-directories))) + +(define* (install-js #:key native-inputs inputs outputs #:allow-other-keys) + (invoke (search-input-file (or native-inputs inputs) "/bin/npm") + "--prefix" (assoc-ref outputs "js") + "--global" + "--offline" + "--loglevel" "info" + "--production" + ;; Skip scripts to prevent building bindings via GYP. + "--ignore-scripts" + "install" "../package.tgz")) + +(define %standard-phases + (modify-phases node:%standard-phases + (replace 'patch-dependencies patch-dependencies) + (replace 'configure configure) + (replace 'build build) + (replace 'check check) + (replace 'install install) + (add-after 'install 'install-js install-js))) + +(define* (tree-sitter-build #:key inputs (phases %standard-phases) + #:allow-other-keys #:rest args) + (apply node:node-build #:inputs inputs #:phases phases args)) + +;;; tree-sitter-build-system.scm ends here