gnu: Add python-html-text.

* gnu/packages/python-web.scm (python-html-text): New variable.
This commit is contained in:
Maxim Cournoyer 2022-05-02 00:39:09 -04:00
parent 77afe03cf9
commit 32ffbb16e8
No known key found for this signature in database
GPG key ID: 1260E46482E63562

View file

@ -7387,3 +7387,25 @@ (define-public python-jstyleson
Contrary to the standard Python @code{json} library, it understands js-style
comments. Trailing comma is also supported.")
(license license:expat)))
(define-public python-html-text
(package
(name "python-html-text")
(version "0.5.2")
(source
(origin
(method url-fetch)
(uri (pypi-uri "html_text" version))
(sha256
(base32 "1v9x171l3bmyayc1144nrkn9410lp4lhlrrjii54j7b5f2xipmmg"))))
(build-system python-build-system)
(native-inputs (list python-pytest))
(propagated-inputs (list python-lxml))
(home-page "https://github.com/TeamHG-Memex/html-text")
(synopsis "Extract text from HTML")
(description "HTML to Text is a Python library for extract text from HTML.
Contrary to other solution such as LXML or Beautiful Soup, the text extracted
with @code{html_text} does not contain elements such as JavaScript or inline
styles not normally visible to users. It also normalizes white space
characters in a smarter, more visually pleasing style.")
(license license:expat)))