bfe3c68572
* gnu/packages/bioinformatics.scm (clustal-omega): New variable.
1042 lines
42 KiB
Scheme
1042 lines
42 KiB
Scheme
;;; GNU Guix --- Functional package management for GNU
|
|
;;; Copyright © 2014, 2015 Ricardo Wurmus <rekado@elephly.net>
|
|
;;;
|
|
;;; This file is part of GNU Guix.
|
|
;;;
|
|
;;; GNU Guix is free software; you can redistribute it and/or modify it
|
|
;;; under the terms of the GNU General Public License as published by
|
|
;;; the Free Software Foundation; either version 3 of the License, or (at
|
|
;;; your option) any later version.
|
|
;;;
|
|
;;; GNU Guix is distributed in the hope that it will be useful, but
|
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
;;; GNU General Public License for more details.
|
|
;;;
|
|
;;; You should have received a copy of the GNU General Public License
|
|
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
(define-module (gnu packages bioinformatics)
|
|
#:use-module ((guix licenses) #:prefix license:)
|
|
#:use-module (guix packages)
|
|
#:use-module (guix utils)
|
|
#:use-module (guix download)
|
|
#:use-module (guix git-download)
|
|
#:use-module (guix build-system gnu)
|
|
#:use-module (guix build-system cmake)
|
|
#:use-module (guix build-system python)
|
|
#:use-module (guix build-system trivial)
|
|
#:use-module (gnu packages)
|
|
#:use-module (gnu packages base)
|
|
#:use-module (gnu packages compression)
|
|
#:use-module (gnu packages java)
|
|
#:use-module (gnu packages ncurses)
|
|
#:use-module (gnu packages perl)
|
|
#:use-module (gnu packages pkg-config)
|
|
#:use-module (gnu packages popt)
|
|
#:use-module (gnu packages python)
|
|
#:use-module (gnu packages tbb)
|
|
#:use-module (gnu packages vim)
|
|
#:use-module (gnu packages zip))
|
|
|
|
(define-public bedops
|
|
(package
|
|
(name "bedops")
|
|
(version "2.4.5")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/bedops/bedops/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0wmg6j0icimlrnsidaxrzf3hfgjvlkkcwvpdg7n4gg7hdv2m9ni5"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f
|
|
#:make-flags (list (string-append "BINDIR=" %output "/bin"))
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'unpack-tarballs
|
|
(lambda _
|
|
;; FIXME: Bedops includes tarballs of minimally patched upstream
|
|
;; libraries jansson, zlib, and bzip2. We cannot just use stock
|
|
;; libraries because at least one of the libraries (zlib) is
|
|
;; patched to add a C++ function definition (deflateInit2cpp).
|
|
;; Until the Bedops developers offer a way to link against system
|
|
;; libraries we have to build the in-tree copies of these three
|
|
;; libraries.
|
|
|
|
;; See upstream discussion:
|
|
;; https://github.com/bedops/bedops/issues/124
|
|
|
|
;; Unpack the tarballs to benefit from shebang patching.
|
|
(with-directory-excursion "third-party"
|
|
(and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
|
|
(zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
|
|
(zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
|
|
;; Disable unpacking of tarballs in Makefile.
|
|
(substitute* "system.mk/Makefile.linux"
|
|
(("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
|
|
(("\\./configure") "CONFIG_SHELL=bash ./configure"))
|
|
(substitute* "third-party/zlib-1.2.7/Makefile.in"
|
|
(("^SHELL=.*$") "SHELL=bash\n")))
|
|
(alist-delete 'configure %standard-phases))))
|
|
(home-page "https://github.com/bedops/bedops")
|
|
(synopsis "Tools for high-performance genomic feature operations")
|
|
(description
|
|
"BEDOPS is a suite of tools to address common questions raised in genomic
|
|
studies---mostly with regard to overlap and proximity relationships between
|
|
data sets. It aims to be scalable and flexible, facilitating the efficient
|
|
and accurate analysis and management of large-scale genomic data.
|
|
|
|
BEDOPS provides tools that perform highly efficient and scalable Boolean and
|
|
other set operations, statistical calculations, archiving, conversion and
|
|
other management of genomic data of arbitrary scale. Tasks can be easily
|
|
split by chromosome for distributing whole-genome analyses across a
|
|
computational cluster.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public bedtools
|
|
(package
|
|
(name "bedtools")
|
|
(version "2.22.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"16aq0w3dmbd0853j32xk9jin4vb6v6fgakfyvrsmsjizzbn3fpfl"))))
|
|
(build-system gnu-build-system)
|
|
(native-inputs `(("python" ,python-2)))
|
|
(inputs `(("samtools" ,samtools)
|
|
("zlib" ,zlib)))
|
|
(arguments
|
|
'(#:test-target "test"
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'patch-makefile-SHELL-definition
|
|
(lambda _
|
|
;; patch-makefile-SHELL cannot be used here as it does not
|
|
;; yet patch definitions with `:='. Since changes to
|
|
;; patch-makefile-SHELL result in a full rebuild, features
|
|
;; of patch-makefile-SHELL are reimplemented here.
|
|
(substitute* "Makefile"
|
|
(("^SHELL := .*$") (string-append "SHELL := " (which "bash") " -e \n"))))
|
|
(alist-delete
|
|
'configure
|
|
(alist-replace
|
|
'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
|
|
(mkdir-p bin)
|
|
(for-each (lambda (file)
|
|
(copy-file file (string-append bin (basename file))))
|
|
(find-files "bin" ".*"))))
|
|
%standard-phases)))))
|
|
(home-page "https://github.com/arq5x/bedtools2")
|
|
(synopsis "Tools for genome analysis and arithmetic")
|
|
(description
|
|
"Collectively, the bedtools utilities are a swiss-army knife of tools for
|
|
a wide-range of genomics analysis tasks. The most widely-used tools enable
|
|
genome arithmetic: that is, set theory on the genome. For example, bedtools
|
|
allows one to intersect, merge, count, complement, and shuffle genomic
|
|
intervals from multiple files in widely-used genomic file formats such as BAM,
|
|
BED, GFF/GTF, VCF.")
|
|
(license license:gpl2)))
|
|
|
|
(define-public python2-pybedtools
|
|
(package
|
|
(name "python2-pybedtools")
|
|
(version "0.6.9")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
|
|
(build-system python-build-system)
|
|
(arguments `(#:python ,python-2)) ; no Python 3 support
|
|
(inputs
|
|
`(("python-cython" ,python2-cython)
|
|
("python-matplotlib" ,python2-matplotlib)))
|
|
(propagated-inputs
|
|
`(("bedtools" ,bedtools)
|
|
("samtools" ,samtools)))
|
|
(native-inputs
|
|
`(("python-pyyaml" ,python2-pyyaml)
|
|
("python-nose" ,python2-nose)
|
|
("python-setuptools" ,python2-setuptools)))
|
|
(home-page "https://pythonhosted.org/pybedtools/")
|
|
(synopsis "Python wrapper for BEDtools programs")
|
|
(description
|
|
"pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
|
|
which are widely used for genomic interval manipulation or \"genome algebra\".
|
|
pybedtools extends BEDTools by offering feature-level manipulations from with
|
|
Python.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public bowtie
|
|
(package
|
|
(name "bowtie")
|
|
(version "2.2.4")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"15dnbqippwvhyh9zqjhaxkabk7lm1xbh1nvar1x4b5kwm117zijn"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(substitute* "Makefile"
|
|
(("^CC = .*$") "CC = gcc")
|
|
(("^CPP = .*$") "CPP = g++")
|
|
;; replace BUILD_HOST and BUILD_TIME for deterministic build
|
|
(("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
|
|
(("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))
|
|
(patches (list (search-patch "bowtie-fix-makefile.patch")))))
|
|
(build-system gnu-build-system)
|
|
(inputs `(("perl" ,perl)
|
|
("perl-clone" ,perl-clone)
|
|
("perl-test-deep" ,perl-test-deep)
|
|
("perl-test-simple" ,perl-test-simple)
|
|
("python" ,python-2)))
|
|
(arguments
|
|
'(#:make-flags '("allall")
|
|
#:phases
|
|
(alist-delete
|
|
'configure
|
|
(alist-replace
|
|
'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
|
|
(mkdir-p bin)
|
|
(for-each (lambda (file)
|
|
(copy-file file (string-append bin file)))
|
|
(find-files "." "bowtie2.*"))))
|
|
(alist-replace
|
|
'check
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(system* "perl"
|
|
"scripts/test/simple_tests.pl"
|
|
"--bowtie2=./bowtie2"
|
|
"--bowtie2-build=./bowtie2-build"))
|
|
%standard-phases)))))
|
|
(home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
|
|
(synopsis "Fast and sensitive nucleotide sequence read aligner")
|
|
(description
|
|
"Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
|
|
reads to long reference sequences. It is particularly good at aligning reads
|
|
of about 50 up to 100s or 1,000s of characters, and particularly good at
|
|
aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
|
|
genome with an FM Index to keep its memory footprint small: for the human
|
|
genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
|
|
gapped, local, and paired-end alignment modes.")
|
|
(supported-systems '("x86_64-linux"))
|
|
(license license:gpl3+)))
|
|
|
|
(define-public bwa
|
|
(package
|
|
(name "bwa")
|
|
(version "0.7.12")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "mirror://sourceforge/bio-bwa/bwa-"
|
|
version ".tar.bz2"))
|
|
(sha256
|
|
(base32
|
|
"1330dpqncv0px3pbhjzz1gwgg39kkcv2r9qp2xs0sixf8z8wl7bh"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f ;no "check" target
|
|
#:phases
|
|
(alist-replace
|
|
'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append
|
|
(assoc-ref outputs "out") "/bin"))
|
|
(doc (string-append
|
|
(assoc-ref outputs "out") "/share/doc/bwa"))
|
|
(man (string-append
|
|
(assoc-ref outputs "out") "/share/man/man1")))
|
|
(mkdir-p bin)
|
|
(mkdir-p doc)
|
|
(mkdir-p man)
|
|
(copy-file "bwa" (string-append bin "/bwa"))
|
|
(copy-file "README.md" (string-append doc "/README.md"))
|
|
(copy-file "bwa.1" (string-append man "/bwa.1"))))
|
|
;; no "configure" script
|
|
(alist-delete 'configure %standard-phases))))
|
|
(inputs `(("zlib" ,zlib)))
|
|
(home-page "http://bio-bwa.sourceforge.net/")
|
|
(synopsis "Burrows-Wheeler sequence aligner")
|
|
(description
|
|
"BWA is a software package for mapping low-divergent sequences against a
|
|
large reference genome, such as the human genome. It consists of three
|
|
algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
|
|
designed for Illumina sequence reads up to 100bp, while the rest two for
|
|
longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
|
|
features such as long-read support and split alignment, but BWA-MEM, which is
|
|
the latest, is generally recommended for high-quality queries as it is faster
|
|
and more accurate. BWA-MEM also has better performance than BWA-backtrack for
|
|
70-100bp Illumina reads.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public python2-bx-python
|
|
(package
|
|
(name "python2-bx-python")
|
|
(version "0.7.2")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://pypi.python.org/packages/source/b/bx-python/bx-python-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0ld49idhc5zjdvbhvjq1a2qmpjj7h5v58rqr25dzmfq7g34b50xh"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(substitute* "setup.py"
|
|
;; remove dependency on outdated "distribute" module
|
|
(("^from distribute_setup import use_setuptools") "")
|
|
(("^use_setuptools\\(\\)") "")))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:tests? #f ;tests fail because test data are not included
|
|
#:python ,python-2))
|
|
(inputs
|
|
`(("python-numpy" ,python2-numpy)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("python-nose" ,python2-nose)
|
|
("python-setuptools" ,python2-setuptools)))
|
|
(home-page "http://bitbucket.org/james_taylor/bx-python/")
|
|
(synopsis "Tools for manipulating biological data")
|
|
(description
|
|
"bx-python provides tools for manipulating biological data, particularly
|
|
multiple sequence alignments.")
|
|
(license license:expat)))
|
|
|
|
(define-public clipper
|
|
(package
|
|
(name "clipper")
|
|
(version "0.3.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/YeoLab/clipper/archive/"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1q7jpimsqln7ic44i8v2rx2haj5wvik8hc1s2syd31zcn0xk1iyq"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
;; remove unnecessary setup dependency
|
|
'(substitute* "setup.py"
|
|
(("setup_requires = .*") "")))))
|
|
(build-system python-build-system)
|
|
(arguments `(#:python ,python-2)) ; only Python 2 is supported
|
|
(inputs
|
|
`(("htseq" ,htseq)
|
|
("python-pybedtools" ,python2-pybedtools)
|
|
("python-cython" ,python2-cython)
|
|
("python-scikit-learn" ,python2-scikit-learn)
|
|
("python-matplotlib" ,python2-matplotlib)
|
|
("python-pysam" ,python2-pysam)
|
|
("python-numpy" ,python2-numpy)
|
|
("python-scipy" ,python2-scipy)))
|
|
(native-inputs
|
|
`(("python-mock" ,python2-mock) ; for tests
|
|
("python-pytz" ,python2-pytz) ; for tests
|
|
("python-setuptools" ,python2-setuptools)))
|
|
(home-page "https://github.com/YeoLab/clipper")
|
|
(synopsis "CLIP peak enrichment recognition")
|
|
(description
|
|
"CLIPper is a tool to define peaks in CLIP-seq datasets.")
|
|
(license license:gpl2)))
|
|
|
|
(define-public clustal-omega
|
|
(package
|
|
(name "clustal-omega")
|
|
(version "1.2.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://www.clustal.org/omega/clustal-omega-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf"))))
|
|
(build-system gnu-build-system)
|
|
(inputs
|
|
`(("argtable" ,argtable)))
|
|
(home-page "http://www.clustal.org/omega/")
|
|
(synopsis "Multiple sequence aligner for protein and DNA/RNA")
|
|
(description
|
|
"Clustal-Omega is a general purpose multiple sequence alignment (MSA)
|
|
program for protein and DNA/RNA. It produces high quality MSAs and is capable
|
|
of handling data-sets of hundreds of thousands of sequences in reasonable
|
|
time.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public crossmap
|
|
(package
|
|
(name "crossmap")
|
|
(version "0.1.6")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"163hi5gjgij6cndxlvbkp5jjwr0k4wbm9im6d2210278q7k9kpnp"))
|
|
;; patch has been sent upstream already
|
|
(patches (list
|
|
(search-patch "crossmap-allow-system-pysam.patch")))
|
|
(modules '((guix build utils)))
|
|
;; remove bundled copy of pysam
|
|
(snippet
|
|
'(delete-file-recursively "lib/pysam"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'set-env
|
|
(lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1"))
|
|
%standard-phases)))
|
|
(inputs
|
|
`(("python-numpy" ,python2-numpy)
|
|
("python-pysam" ,python2-pysam)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("python-cython" ,python2-cython)
|
|
("python-nose" ,python2-nose)
|
|
("python-setuptools" ,python2-setuptools)))
|
|
(home-page "http://crossmap.sourceforge.net/")
|
|
(synopsis "Convert genome coordinates between assemblies")
|
|
(description
|
|
"CrossMap is a program for conversion of genome coordinates or annotation
|
|
files between different genome assemblies. It supports most commonly used
|
|
file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public cutadapt
|
|
(package
|
|
(name "cutadapt")
|
|
(version "1.8")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/marcelm/cutadapt/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"161bp87y6gd6r5bmvjpn2b1k942i3fizfpa139f0jn6jv1wcp5h5"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
;; tests must be run after install
|
|
`(#:phases (alist-cons-after
|
|
'install 'check
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(setenv "PYTHONPATH"
|
|
(string-append
|
|
(getenv "PYTHONPATH")
|
|
":" (assoc-ref outputs "out")
|
|
"/lib/python"
|
|
(string-take (string-take-right
|
|
(assoc-ref inputs "python") 5) 3)
|
|
"/site-packages"))
|
|
(zero? (system* "nosetests" "-P" "tests")))
|
|
(alist-delete 'check %standard-phases))))
|
|
(native-inputs
|
|
`(("python-cython" ,python-cython)
|
|
("python-nose" ,python-nose)
|
|
("python-setuptools" ,python-setuptools)))
|
|
(home-page "https://code.google.com/p/cutadapt/")
|
|
(synopsis "Remove adapter sequences from nucleotide sequencing reads")
|
|
(description
|
|
"Cutadapt finds and removes adapter sequences, primers, poly-A tails and
|
|
other types of unwanted sequence from high-throughput sequencing reads.")
|
|
(license license:expat)))
|
|
|
|
(define-public flexbar
|
|
(package
|
|
(name "flexbar")
|
|
(version "2.5")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri
|
|
(string-append "mirror://sourceforge/flexbar/"
|
|
version "/flexbar_v" version "_src.tgz"))
|
|
(sha256
|
|
(base32
|
|
"13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(;; There is no test target, although there is a directory containing
|
|
;; test data and scripts (launched by flexbar_validate.sh).
|
|
#:tests? #f
|
|
#:configure-flags (list
|
|
(string-append "-DFLEXBAR_BINARY_DIR="
|
|
(assoc-ref %outputs "out")
|
|
"/bin/"))
|
|
#:phases
|
|
(alist-delete 'install %standard-phases)))
|
|
(inputs
|
|
`(("tbb" ,tbb)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("pkg-config" ,pkg-config)
|
|
("seqan" ,seqan)))
|
|
(home-page "http://flexbar.sourceforge.net")
|
|
(synopsis "Barcode and adapter removal tool for sequencing platforms")
|
|
(description
|
|
"Flexbar preprocesses high-throughput nucleotide sequencing data
|
|
efficiently. It demultiplexes barcoded runs and removes adapter sequences.
|
|
Moreover, trimming and filtering features are provided. Flexbar increases
|
|
read mapping rates and improves genome and transcriptome assemblies. It
|
|
supports next-generation sequencing data in fasta/q and csfasta/q format from
|
|
Illumina, Roche 454, and the SOLiD platform.")
|
|
(license license:gpl3)))
|
|
|
|
(define-public hisat
|
|
(package
|
|
(name "hisat")
|
|
(version "0.1.4")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://ccb.jhu.edu/software/hisat/downloads/hisat-"
|
|
version "-beta-source.zip"))
|
|
(sha256
|
|
(base32
|
|
"1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f ;no check target
|
|
#:make-flags '("allall"
|
|
;; Disable unsupported `popcnt' instructions on
|
|
;; architectures other than x86_64
|
|
,@(if (string-prefix? "x86_64"
|
|
(or (%current-target-system)
|
|
(%current-system)))
|
|
'()
|
|
'("POPCNT_CAPABILITY=0")))
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'patch-sources
|
|
(lambda _
|
|
;; XXX Cannot use snippet because zip files are not supported
|
|
(substitute* "Makefile"
|
|
(("^CC = .*$") "CC = gcc")
|
|
(("^CPP = .*$") "CPP = g++")
|
|
;; replace BUILD_HOST and BUILD_TIME for deterministic build
|
|
(("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
|
|
(("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
|
|
(substitute* '("hisat-build" "hisat-inspect")
|
|
(("/usr/bin/env") (which "env"))))
|
|
(alist-replace
|
|
'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
|
|
(mkdir-p bin)
|
|
(for-each
|
|
(lambda (file)
|
|
(copy-file file (string-append bin file)))
|
|
(find-files
|
|
"."
|
|
"hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))))
|
|
(alist-delete 'configure %standard-phases)))))
|
|
(native-inputs
|
|
`(("unzip" ,unzip)))
|
|
(inputs
|
|
`(("perl" ,perl)
|
|
("python" ,python)
|
|
("zlib" ,zlib)))
|
|
(home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
|
|
(synopsis "Hierarchical indexing for spliced alignment of transcripts")
|
|
(description
|
|
"HISAT is a fast and sensitive spliced alignment program for mapping
|
|
RNA-seq reads. In addition to one global FM index that represents a whole
|
|
genome, HISAT uses a large set of small FM indexes that collectively cover the
|
|
whole genome. These small indexes (called local indexes) combined with
|
|
several alignment strategies enable effective alignment of RNA-seq reads, in
|
|
particular, reads spanning multiple exons.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public htseq
|
|
(package
|
|
(name "htseq")
|
|
(version "0.6.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://pypi.python.org/packages/source/H/HTSeq/HTSeq-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1i85ppf2j2lj12m0x690qq5nn17xxk23pbbx2c83r8ayb5wngzwv"))))
|
|
(build-system python-build-system)
|
|
(arguments `(#:python ,python-2)) ; only Python 2 is supported
|
|
(inputs
|
|
`(("python-numpy" ,python2-numpy)
|
|
("python-setuptools" ,python2-setuptools)))
|
|
(home-page "http://www-huber.embl.de/users/anders/HTSeq/")
|
|
(synopsis "Analysing high-throughput sequencing data with Python")
|
|
(description
|
|
"HTSeq is a Python package that provides infrastructure to process data
|
|
from high-throughput sequencing assays.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public htsjdk
|
|
(package
|
|
(name "htsjdk")
|
|
(version "1.129")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/samtools/htsjdk/archive/"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0asdk9b8jx2ij7yd6apg9qx03li8q7z3ml0qy2r2qczkra79y6fw"))
|
|
(modules '((guix build utils)))
|
|
;; remove build dependency on git
|
|
(snippet '(substitute* "build.xml"
|
|
(("failifexecutionfails=\"true\"")
|
|
"failifexecutionfails=\"false\"")))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:modules ((srfi srfi-1)
|
|
(guix build gnu-build-system)
|
|
(guix build utils))
|
|
#:phases (alist-replace
|
|
'build
|
|
(lambda _
|
|
(setenv "JAVA_HOME" (assoc-ref %build-inputs "jdk"))
|
|
(zero? (system* "ant" "all"
|
|
(string-append "-Ddist="
|
|
(assoc-ref %outputs "out")
|
|
"/share/java/htsjdk/"))))
|
|
(fold alist-delete %standard-phases
|
|
'(configure install check)))))
|
|
(native-inputs
|
|
`(("ant" ,ant)
|
|
("jdk" ,icedtea6 "jdk")))
|
|
(home-page "http://samtools.github.io/htsjdk/")
|
|
(synopsis "Java API for high-throughput sequencing data (HTS) formats")
|
|
(description
|
|
"HTSJDK is an implementation of a unified Java library for accessing
|
|
common file formats, such as SAM and VCF, used for high-throughput
|
|
sequencing (HTS) data. There are also an number of useful utilities for
|
|
manipulating HTS data.")
|
|
(license license:expat)))
|
|
|
|
(define-public macs
|
|
(package
|
|
(name "macs")
|
|
(version "2.1.0.20140616")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://pypi.python.org/packages/source/M/MACS2/MACS2-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"11lmiw6avqhwn75sn59g4lfkrr2kk20r3rgfbx9xfqb8rg9mi2n6"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2 ; only compatible with Python 2.7
|
|
#:tests? #f)) ; no test target
|
|
(inputs
|
|
`(("python-numpy" ,python2-numpy)))
|
|
(native-inputs
|
|
`(("python-setuptools" ,python2-setuptools)))
|
|
(home-page "http://github.com/taoliu/MACS/")
|
|
(synopsis "Model based analysis for ChIP-Seq data")
|
|
(description
|
|
"MACS is an implementation of a ChIP-Seq analysis algorithm for
|
|
identifying transcript factor binding sites named Model-based Analysis of
|
|
ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
|
|
the significance of enriched ChIP regions and it improves the spatial
|
|
resolution of binding sites through combining the information of both
|
|
sequencing tag position and orientation.")
|
|
(license license:bsd-3)))
|
|
|
|
(define-public miso
|
|
(package
|
|
(name "miso")
|
|
(version "0.5.3")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://pypi.python.org/packages/source/m/misopy/misopy-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0x446867az8ir0z8c1vjqffkp0ma37wm4sylixnkhgawllzx8v5w"))
|
|
(modules '((guix build utils)))
|
|
;; use "gcc" instead of "cc" for compilation
|
|
(snippet
|
|
'(substitute* "setup.py"
|
|
(("^defines")
|
|
"cc.set_executables(
|
|
compiler='gcc',
|
|
compiler_so='gcc',
|
|
linker_exe='gcc',
|
|
linker_so='gcc -shared'); defines")))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2 ; only Python 2 is supported
|
|
#:tests? #f)) ; no "test" target
|
|
(inputs
|
|
`(("samtools" ,samtools)
|
|
("python-numpy" ,python2-numpy)
|
|
("python-pysam" ,python2-pysam)
|
|
("python-scipy" ,python2-scipy)
|
|
("python-matplotlib" ,python2-matplotlib)))
|
|
(native-inputs
|
|
`(("python-setuptools" ,python2-setuptools)))
|
|
(home-page "http://genes.mit.edu/burgelab/miso/index.html")
|
|
(synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
|
|
(description
|
|
"MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
|
|
the expression level of alternatively spliced genes from RNA-Seq data, and
|
|
identifies differentially regulated isoforms or exons across samples. By
|
|
modeling the generative process by which reads are produced from isoforms in
|
|
RNA-Seq, the MISO model uses Bayesian inference to compute the probability
|
|
that a read originated from a particular isoform.")
|
|
(license license:gpl2)))
|
|
|
|
(define-public python2-pbcore
|
|
(package
|
|
(name "python2-pbcore")
|
|
(version "0.9.3")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/PacificBiosciences/pbcore/archive/"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1z46rwjac93jm87cbj2zgjg6qvsgs65140wkbbxsvxps7ai4pm09"))))
|
|
(build-system python-build-system)
|
|
(arguments `(#:python ,python-2)) ; pbcore requires Python 2.7
|
|
(inputs
|
|
`(("python-cython" ,python2-cython)
|
|
("python-numpy" ,python2-numpy)
|
|
("python-pysam" ,python2-pysam)
|
|
("python-h5py" ,python2-h5py)))
|
|
(native-inputs
|
|
`(("python-setuptools" ,python2-setuptools)))
|
|
(home-page "http://pacificbiosciences.github.io/pbcore/")
|
|
(synopsis "Library for reading and writing PacBio data files")
|
|
(description
|
|
"The pbcore package provides Python APIs for interacting with PacBio data
|
|
files and writing bioinformatics applications.")
|
|
(license license:bsd-3)))
|
|
|
|
(define-public pbtranscript-tofu
|
|
(let ((commit "c7bbd5472"))
|
|
(package
|
|
(name "pbtranscript-tofu")
|
|
(version (string-append "0.4.1." commit))
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/PacificBiosciences/cDNA_primer.git")
|
|
(commit commit)))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"148xkzi689c49g6fdhckp6mnmj2qhjdf1j4wifm6ja7ij95d7fxx"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2
|
|
;; With standard flags, the install phase attempts to create a zip'd
|
|
;; egg file, and fails with an error: 'ZIP does not support timestamps
|
|
;; before 1980'
|
|
#:configure-flags '("--single-version-externally-managed"
|
|
"--record=pbtranscript-tofu.txt")
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'enter-directory-and-clean-up
|
|
(lambda _
|
|
(chdir "pbtranscript-tofu/pbtranscript/")
|
|
;; Delete clutter
|
|
(delete-file-recursively "dist/")
|
|
(delete-file-recursively "build/")
|
|
(delete-file-recursively "setuptools_cython-0.2.1-py2.6.egg/")
|
|
(delete-file-recursively "pbtools.pbtranscript.egg-info")
|
|
(delete-file "Cython-0.20.1.tar.gz")
|
|
(delete-file "setuptools_cython-0.2.1-py2.7.egg")
|
|
(delete-file "setuptools_cython-0.2.1.tar.gz")
|
|
(delete-file "setup.cfg")
|
|
(for-each delete-file
|
|
(find-files "." "\\.so$"))
|
|
;; files should be writable for install phase
|
|
(for-each (lambda (f) (chmod f #o755))
|
|
(find-files "." "\\.py$")))
|
|
%standard-phases)))
|
|
(inputs
|
|
`(("python-cython" ,python2-cython)
|
|
("python-numpy" ,python2-numpy)
|
|
("python-bx-python" ,python2-bx-python)
|
|
("python-networkx" ,python2-networkx)
|
|
("python-scipy" ,python2-scipy)
|
|
("python-pbcore" ,python2-pbcore)))
|
|
(native-inputs
|
|
`(("python-nose" ,python2-nose)
|
|
("python-setuptools" ,python2-setuptools)))
|
|
(home-page "https://github.com/PacificBiosciences/cDNA_primer")
|
|
(synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
|
|
(description
|
|
"pbtranscript-tofu contains scripts to analyze transcriptome data
|
|
generated using the PacBio Iso-Seq protocol.")
|
|
(license license:bsd-3))))
|
|
|
|
(define-public rseqc
|
|
(package
|
|
(name "rseqc")
|
|
(version "2.6.1")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
(uri
|
|
(string-append "mirror://sourceforge/rseqc/"
|
|
version "/RSeQC-" version ".tar.gz"))
|
|
(sha256
|
|
(base32 "09rf0x9d6apjja5l01cgprj7vigpw6kiqhy34ibwwlxil0db0ri4"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(begin
|
|
;; remove bundled copy of pysam
|
|
(delete-file-recursively "lib/pysam")
|
|
(substitute* "setup.py"
|
|
;; remove dependency on outdated "distribute" module
|
|
(("^from distribute_setup import use_setuptools") "")
|
|
(("^use_setuptools\\(\\)") "")
|
|
;; do not use bundled copy of pysam
|
|
(("^have_pysam = False") "have_pysam = True"))))))
|
|
(build-system python-build-system)
|
|
(arguments `(#:python ,python-2))
|
|
(inputs
|
|
`(("python-cython" ,python2-cython)
|
|
("python-pysam" ,python2-pysam)
|
|
("python-numpy" ,python2-numpy)
|
|
("python-setuptools" ,python2-setuptools)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("python-nose" ,python2-nose)))
|
|
(home-page "http://rseqc.sourceforge.net/")
|
|
(synopsis "RNA-seq quality control package")
|
|
(description
|
|
"RSeQC provides a number of modules that can comprehensively evaluate
|
|
high throughput sequence data, especially RNA-seq data. Some basic modules
|
|
inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
|
|
while RNA-seq specific modules evaluate sequencing saturation, mapped reads
|
|
distribution, coverage uniformity, strand specificity, etc.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public samtools
|
|
(package
|
|
(name "samtools")
|
|
(version "1.1")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
(uri
|
|
(string-append "mirror://sourceforge/samtools/"
|
|
version "/samtools-" version ".tar.bz2"))
|
|
(sha256
|
|
(base32
|
|
"1y5p2hs4gif891b4ik20275a8xf3qrr1zh9wpysp4g8m0g1jckf2"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(;; There are 87 test failures when building on non-64-bit architectures
|
|
;; due to invalid test data. This has since been fixed upstream (see
|
|
;; <https://github.com/samtools/samtools/pull/307>), but as there has
|
|
;; not been a new release we disable the tests for all non-64-bit
|
|
;; systems.
|
|
#:tests? ,(string=? (or (%current-system) (%current-target-system))
|
|
"x86_64-linux")
|
|
#:make-flags (list (string-append "prefix=" (assoc-ref %outputs "out")))
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack
|
|
'patch-makefile-curses
|
|
(lambda _
|
|
(substitute* "Makefile"
|
|
(("-lcurses") "-lncurses")))
|
|
(alist-cons-after
|
|
'unpack
|
|
'patch-tests
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(let ((bash (assoc-ref inputs "bash")))
|
|
(substitute* "test/test.pl"
|
|
;; The test script calls out to /bin/bash
|
|
(("/bin/bash")
|
|
(string-append bash "/bin/bash"))
|
|
;; There are two failing tests upstream relating to the "stats"
|
|
;; subcommand in test_usage_subcommand ("did not have Usage"
|
|
;; and "usage did not mention samtools stats"), so we disable
|
|
;; them.
|
|
(("(test_usage_subcommand\\(.*\\);)" cmd)
|
|
(string-append "unless ($subcommand eq 'stats') {" cmd "};")))))
|
|
(alist-delete
|
|
'configure
|
|
%standard-phases)))))
|
|
(native-inputs `(("pkg-config" ,pkg-config)))
|
|
(inputs `(("ncurses" ,ncurses)
|
|
("perl" ,perl)
|
|
("python" ,python)
|
|
("zlib" ,zlib)))
|
|
(home-page "http://samtools.sourceforge.net")
|
|
(synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
|
|
(description
|
|
"Samtools implements various utilities for post-processing nucleotide
|
|
sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
|
|
variant calling (in conjunction with bcftools), and a simple alignment
|
|
viewer.")
|
|
(license license:expat)))
|
|
|
|
(define-public seqan
|
|
(package
|
|
(name "seqan")
|
|
(version "1.4.2")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "http://packages.seqan.de/seqan-library/"
|
|
"seqan-library-" version ".tar.bz2"))
|
|
(sha256
|
|
(base32
|
|
"05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
|
|
;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
|
|
;; makes sense to split the outputs.
|
|
(outputs '("out" "doc"))
|
|
(build-system trivial-build-system)
|
|
(arguments
|
|
`(#:modules ((guix build utils))
|
|
#:builder
|
|
(begin
|
|
(use-modules (guix build utils))
|
|
(let ((tar (assoc-ref %build-inputs "tar"))
|
|
(bzip (assoc-ref %build-inputs "bzip2"))
|
|
(out (assoc-ref %outputs "out"))
|
|
(doc (assoc-ref %outputs "doc")))
|
|
(setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
|
|
(system* "tar" "xvf" (assoc-ref %build-inputs "source"))
|
|
(chdir (string-append "seqan-library-" ,version))
|
|
(copy-recursively "include" (string-append out "/include"))
|
|
(copy-recursively "share" (string-append doc "/share"))))))
|
|
(native-inputs
|
|
`(("source" ,source)
|
|
("tar" ,tar)
|
|
("bzip2" ,bzip2)))
|
|
(home-page "http://www.seqan.de")
|
|
(synopsis "Library for nucleotide sequence analysis")
|
|
(description
|
|
"SeqAn is a C++ library of efficient algorithms and data structures for
|
|
the analysis of sequences with the focus on biological data. It contains
|
|
algorithms and data structures for string representation and their
|
|
manipulation, online and indexed string search, efficient I/O of
|
|
bioinformatics file formats, sequence alignment, and more.")
|
|
(license license:bsd-3)))
|
|
|
|
(define-public star
|
|
(package
|
|
(name "star")
|
|
(version "2.4.0j")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/alexdobin/STAR/archive/STAR_"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1y3bciych1aw6s7k8sy1saj23dcan9wk4d4f96an499slkxwz712"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(substitute* "source/Makefile"
|
|
(("/bin/rm") "rm")))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f ;no check target
|
|
#:make-flags '("STAR")
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'enter-source-dir (lambda _ (chdir "source"))
|
|
(alist-replace
|
|
'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
|
|
(mkdir-p bin)
|
|
(copy-file "STAR" (string-append bin "STAR"))))
|
|
(alist-delete
|
|
'configure %standard-phases)))))
|
|
(native-inputs
|
|
`(("vim" ,vim))) ; for xxd
|
|
(inputs
|
|
`(("zlib" ,zlib)))
|
|
(home-page "https://github.com/alexdobin/STAR")
|
|
(synopsis "Universal RNA-seq aligner")
|
|
(description
|
|
"The Spliced Transcripts Alignment to a Reference (STAR) software is
|
|
based on a previously undescribed RNA-seq alignment algorithm that uses
|
|
sequential maximum mappable seed search in uncompressed suffix arrays followed
|
|
by seed clustering and stitching procedure. In addition to unbiased de novo
|
|
detection of canonical junctions, STAR can discover non-canonical splices and
|
|
chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
|
|
sequences.")
|
|
;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
|
|
(license license:gpl3+)))
|
|
|
|
(define-public vcftools
|
|
(package
|
|
(name "vcftools")
|
|
(version "0.1.12b")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"mirror://sourceforge/vcftools/vcftools_"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"148al9h7f8g8my2qdnpax51kdd2yjrivlx6frvakf4lz5r8j88wx"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f ; no "check" target
|
|
#:make-flags (list
|
|
(string-append "PREFIX=" (assoc-ref %outputs "out"))
|
|
(string-append "MANDIR=" (assoc-ref %outputs "out")
|
|
"/share/man/man1"))
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'patch-manpage-install
|
|
(lambda _
|
|
(substitute* "Makefile"
|
|
(("cp \\$\\{PREFIX\\}/cpp/vcftools.1") "cp ./cpp/vcftools.1")))
|
|
(alist-delete 'configure %standard-phases))))
|
|
(inputs
|
|
`(("perl" ,perl)
|
|
("zlib" ,zlib)))
|
|
(home-page "http://vcftools.sourceforge.net/")
|
|
(synopsis "Tools for working with VCF files")
|
|
(description
|
|
"VCFtools is a program package designed for working with VCF files, such
|
|
as those generated by the 1000 Genomes Project. The aim of VCFtools is to
|
|
provide easily accessible methods for working with complex genetic variation
|
|
data in the form of VCF files.")
|
|
;; The license is declared as LGPLv3 in the README and
|
|
;; at http://vcftools.sourceforge.net/license.html
|
|
(license license:lgpl3)))
|