gnu: Add sentencepiece.
* gnu/packages/machine-learning.scm (sentencepiece): New variable. Signed-off-by: Nicolas Goaziou <mail@nicolasgoaziou.fr>
This commit is contained in:
parent
46c4c6cae4
commit
70510eb047
1 changed files with 28 additions and 0 deletions
|
@ -583,6 +583,34 @@ (define openfst-for-vosk
|
||||||
'("--enable-shared" "--enable-far" "--enable-ngram-fsts"
|
'("--enable-shared" "--enable-far" "--enable-ngram-fsts"
|
||||||
"--enable-lookahead-fsts" "--with-pic" "--disable-bin")))))
|
"--enable-lookahead-fsts" "--with-pic" "--disable-bin")))))
|
||||||
|
|
||||||
|
(define-public sentencepiece
|
||||||
|
(package
|
||||||
|
(name "sentencepiece")
|
||||||
|
(version "0.1.97")
|
||||||
|
(source
|
||||||
|
(origin
|
||||||
|
(method git-fetch)
|
||||||
|
(uri (git-reference
|
||||||
|
(url "https://github.com/google/sentencepiece")
|
||||||
|
(commit (string-append "v" version))))
|
||||||
|
(file-name (git-file-name name version))
|
||||||
|
(sha256
|
||||||
|
(base32 "1kzfkp2pk0vabyw3wmkh16h11chzq63mzc20ddhsag5fp6s91ajg"))))
|
||||||
|
(build-system cmake-build-system)
|
||||||
|
(arguments (list #:tests? #f)) ;no tests
|
||||||
|
(native-inputs (list gperftools))
|
||||||
|
(home-page "https://github.com/google/sentencepiece")
|
||||||
|
(synopsis "Unsupervised tokenizer for Neural Network-based text generation")
|
||||||
|
(description
|
||||||
|
"SentencePiece is an unsupervised text tokenizer and detokenizer mainly
|
||||||
|
for Neural Network-based text generation systems where the vocabulary size is
|
||||||
|
predetermined prior to the neural model training. SentencePiece implements
|
||||||
|
subword units---e.g., byte-pair-encoding (BPE) and unigram language
|
||||||
|
model---with the extension of direct training from raw sentences.
|
||||||
|
SentencePiece allows us to make a purely end-to-end system that does not
|
||||||
|
depend on language-specific pre- or post-processing.")
|
||||||
|
(license license:asl2.0)))
|
||||||
|
|
||||||
(define-public shogun
|
(define-public shogun
|
||||||
(package
|
(package
|
||||||
(name "shogun")
|
(name "shogun")
|
||||||
|
|
Loading…
Reference in a new issue