cve: Use 'http-fetch/cached' instead of having custom caching.

That way CVE fetching benefits from 'If-Modified-Since' handling.

* guix/http-client.scm (http-fetch/cached): Add #:write-cache and
 #:cache-miss parameters and honor them.
* guix/cve.scm (%current-year-ttl, %past-year-ttl): Reduce.
(call-with-cve-port): Remove.
(write-cache): New procedure.
(fetch-vulnerabilities): Rewrite in terms of 'http-fetch/cached'.
This commit is contained in:
Ludovic Courtès 2017-11-15 10:23:38 +01:00
parent 866f37fb7e
commit 7482b98120
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5
2 changed files with 42 additions and 65 deletions

View file

@ -19,7 +19,6 @@
(define-module (guix cve) (define-module (guix cve)
#:use-module (guix utils) #:use-module (guix utils)
#:use-module (guix http-client) #:use-module (guix http-client)
#:use-module ((guix build utils) #:select (mkdir-p))
#:use-module (sxml ssax) #:use-module (sxml ssax)
#:use-module (web uri) #:use-module (web uri)
#:use-module (srfi srfi-1) #:use-module (srfi srfi-1)
@ -68,24 +67,11 @@ (define (yearly-feed-uri year)
(define %current-year-ttl (define %current-year-ttl
;; According to <https://nvd.nist.gov/download.cfm#CVE_FEED>, feeds are ;; According to <https://nvd.nist.gov/download.cfm#CVE_FEED>, feeds are
;; updated "approximately every two hours." ;; updated "approximately every two hours."
(* 3600 3)) (* 60 30))
(define %past-year-ttl (define %past-year-ttl
;; Update the previous year's database more and more infrequently. ;; Update the previous year's database more and more infrequently.
(* 3600 24 2 (date-month %now))) (* 3600 24 (date-month %now)))
(define (call-with-cve-port uri ttl proc)
"Pass PROC an input port from which to read the CVE stream."
(let ((port (http-fetch uri)))
(dynamic-wind
(const #t)
(lambda ()
(call-with-decompressed-port 'gzip port
(lambda (port)
(setvbuf port _IOFBF 65536)
(proc port))))
(lambda ()
(close-port port)))))
(define %cpe-package-rx (define %cpe-package-rx
;; For applications: "cpe:/a:VENDOR:PACKAGE:VERSION", or sometimes ;; For applications: "cpe:/a:VENDOR:PACKAGE:VERSION", or sometimes
@ -194,40 +180,27 @@ (define sexp->vulnerability
(('v id (packages ...)) (('v id (packages ...))
(vulnerability id packages)))) (vulnerability id packages))))
(define (write-cache input cache)
"Read vulnerabilities as gzipped XML from INPUT, and write it as a compact
sexp to CACHE."
(call-with-decompressed-port 'gzip input
(lambda (input)
;; XXX: The SSAX "error port" is used to send pointless warnings such as
;; "warning: Skipping PI". Turn that off.
(define vulns
(parameterize ((current-ssax-error-port (%make-void-port "w")))
(xml->vulnerabilities input)))
(write `(vulnerabilities
1 ;format version
,(map vulnerability->sexp vulns))
cache))))
(define (fetch-vulnerabilities year ttl) (define (fetch-vulnerabilities year ttl)
"Return the list of <vulnerability> for YEAR, assuming the on-disk cache has "Return the list of <vulnerability> for YEAR, assuming the on-disk cache has
the given TTL (fetch from the NIST web site when TTL has expired)." the given TTL (fetch from the NIST web site when TTL has expired)."
;; Note: We used to keep the original XML files in cache but parsing it (define (cache-miss uri)
;; would take typically ~15s for a year of data. Thus, we instead store a (format (current-error-port) "fetching CVE database for ~a...~%" year))
;; summarized version thereof as an sexp, which can be parsed in 1s or so.
(define cache
(string-append (cache-directory) "/cve/" (number->string year)))
(define (do-fetch)
(call-with-cve-port (yearly-feed-uri year) ttl
(lambda (port)
;; XXX: The SSAX "error port" is used to send pointless warnings such as
;; "warning: Skipping PI". Turn that off.
(format (current-error-port) "fetching CVE database for ~a...~%" year)
(parameterize ((current-ssax-error-port (%make-void-port "w")))
(xml->vulnerabilities port)))))
(define (update-cache)
(mkdir-p (dirname cache))
(let ((vulns (do-fetch)))
(with-atomic-file-output cache
(lambda (port)
(write `(vulnerabilities
1 ;format version
,(map vulnerability->sexp vulns))
port)))
vulns))
(define (old? file)
;; Return true if PORT has passed TTL.
(let* ((s (stat file))
(now (current-time time-utc)))
(< (+ (stat:mtime s) ttl) (time-second now))))
(define (read* port) (define (read* port)
;; Disable read options to avoid populating the source property weak ;; Disable read options to avoid populating the source property weak
@ -242,17 +215,18 @@ (define (read* port)
(lambda () (lambda ()
(read-options options))))) (read-options options)))))
(catch 'system-error ;; Note: We used to keep the original XML files in cache but parsing it
(lambda () ;; would take typically ~15s for a year of data. Thus, we instead store a
(if (old? cache) ;; summarized version thereof as an sexp, which can be parsed in 1s or so.
(update-cache) (let* ((port (http-fetch/cached (yearly-feed-uri year)
(match (call-with-input-file cache read*) #:ttl ttl
(('vulnerabilities 1 vulns) #:write-cache write-cache
(map sexp->vulnerability vulns)) #:cache-miss cache-miss))
(x (sexp (read* port)))
(update-cache))))) (close-port port)
(lambda args (match sexp
(update-cache)))) (('vulnerabilities 1 vulns)
(map sexp->vulnerability vulns)))))
(define (current-vulnerabilities) (define (current-vulnerabilities)
"Return the current list of Common Vulnerabilities and Exposures (CVE) as "Return the current list of Common Vulnerabilities and Exposures (CVE) as
@ -307,8 +281,4 @@ (define table
package table))) package table)))
;;; Local Variables:
;;; eval: (put 'call-with-cve-port 'scheme-indent-function 2)
;;; End:
;;; cve.scm ends here ;;; cve.scm ends here

View file

@ -302,9 +302,15 @@ (define (cache-file-for-uri uri)
(base64-encode digest 0 (bytevector-length digest) (base64-encode digest 0 (bytevector-length digest)
#f #f base64url-alphabet)))) #f #f base64url-alphabet))))
(define* (http-fetch/cached uri #:key (ttl (%http-cache-ttl)) text?) (define* (http-fetch/cached uri #:key (ttl (%http-cache-ttl)) text?
(write-cache dump-port)
(cache-miss (const #t)))
"Like 'http-fetch', return an input port, but cache its contents in "Like 'http-fetch', return an input port, but cache its contents in
~/.cache/guix. The cache remains valid for TTL seconds." ~/.cache/guix. The cache remains valid for TTL seconds.
Call WRITE-CACHE with the HTTP input port and the cache output port to write
the data to cache. Call CACHE-MISS with URI just before fetching data from
URI."
(let ((file (cache-file-for-uri uri))) (let ((file (cache-file-for-uri uri)))
(define (update-cache cache-port) (define (update-cache cache-port)
(define cache-time (define cache-time
@ -327,11 +333,12 @@ (define headers
(raise c)))) (raise c))))
(let ((port (http-fetch uri #:text? text? (let ((port (http-fetch uri #:text? text?
#:headers headers))) #:headers headers)))
(cache-miss uri)
(mkdir-p (dirname file)) (mkdir-p (dirname file))
(when cache-port (when cache-port
(close-port cache-port)) (close-port cache-port))
(with-atomic-file-output file (with-atomic-file-output file
(cut dump-port port <>)) (cut write-cache port <>))
(close-port port) (close-port port)
(open-input-file file)))) (open-input-file file))))