gnu-maintenance: Add 'generic-html' updater.

This brings total updater coverage, as reported by 'guix refresh
--list-updaters', from 78% to 88.3%.  Among many other things, it covers
freedesktop.org packages.

* guix/gnu-maintenance.scm (html-updatable-package?)
(latest-html-updatable-release): New procedures.
(%generic-html-updater): New variable.
* doc/guix.texi (Invoking guix refresh): Document it.
This commit is contained in:
Ludovic Courtès 2021-03-13 22:25:12 +01:00
parent 1d5a946ce5
commit af9af2180e
No known key found for this signature in database
GPG Key ID: 090B11993D9AEBB5
2 changed files with 62 additions and 1 deletions

View File

@ -11707,6 +11707,9 @@ the updater for @uref{https://www.stackage.org, Stackage} packages.
the updater for @uref{https://crates.io, Crates} packages.
@item launchpad
the updater for @uref{https://launchpad.net, Launchpad} packages.
@item generic-html
a generic updater that crawls the HTML page where the source tarball of
the package is hosted, when applicable.
@end table
For instance, the following command only checks for updates of Emacs

View File

@ -28,6 +28,7 @@
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-26)
#:use-module (srfi srfi-34)
#:use-module (rnrs io ports)
#:use-module (system foreign)
#:use-module (guix http-client)
@ -66,7 +67,8 @@
%gnu-ftp-updater
%savannah-updater
%xorg-updater
%kernel.org-updater))
%kernel.org-updater
%generic-html-updater))
;;; Commentary:
;;;
@ -697,6 +699,55 @@ releases are on gnu.org."
#:file->signature file->signature)
(cut adjusted-upstream-source <> rewrite))))
(define html-updatable-package?
;; Return true if the given package may be handled by the generic HTML
;; updater.
(let ((hosting-sites '("github.com" "github.io" "gitlab.com"
"notabug.org" "sr.ht"
"gforge.inria.fr" "gitlab.inria.fr"
"ftp.gnu.org" "download.savannah.gnu.org"
"pypi.org" "crates.io" "rubygems.org"
"bioconductor.org")))
(url-predicate (lambda (url)
(match (string->uri url)
(#f #f)
(uri
(let ((scheme (uri-scheme uri))
(host (uri-host uri)))
(and (memq scheme '(http https))
(not (member host hosting-sites))))))))))
(define (latest-html-updatable-release package)
"Return the latest release of PACKAGE. Do that by crawling the HTML page of
the directory containing its source tarball."
(let* ((uri (string->uri
(match (origin-uri (package-source package))
((? string? url) url)
((url _ ...) url))))
(custom (assoc-ref (package-properties package)
'release-monitoring-url))
(base (or custom
(string-append (symbol->string (uri-scheme uri))
"://" (uri-host uri))))
(directory (if custom
""
(dirname (uri-path uri))))
(package (package-upstream-name package)))
(catch #t
(lambda ()
(guard (c ((http-get-error? c) #f))
(latest-html-release package
#:base-url base
#:directory directory)))
(lambda (key . args)
;; Return false and move on upon connection failures and bogus HTTP
;; servers.
(unless (memq key '(gnutls-error tls-certificate-error
system-error
bad-header bad-header-component))
(apply throw key args))
#f))))
(define %gnu-updater
;; This is for everything at ftp.gnu.org.
(upstream-updater
@ -737,4 +788,11 @@ releases are on gnu.org."
(pred (url-prefix-predicate "mirror://kernel.org/"))
(latest latest-kernel.org-release)))
(define %generic-html-updater
(upstream-updater
(name 'generic-html)
(description "Updater that crawls HTML pages.")
(pred html-updatable-package?)
(latest latest-html-updatable-release)))
;;; gnu-maintenance.scm ends here