swh: Add hooks for rate limiting handling.

* guix/swh.scm (%allow-request?, %save-rate-limit-reset-time)
(%general-rate-limit-reset-time): New variables.
(request-rate-limit-reached?, update-rate-limit-reset-time!): New
procedures.
(call): Call '%allow-request?'.  Change 'swh-error' protocol to pass
METHOD in addition to URL.
* tests/swh.scm ("rate limit reached")
("%allow-request? and request-rate-limit-reached?"): New tests.
This commit is contained in:
Ludovic Courtès 2019-08-29 15:59:16 +02:00
parent 9323ab550f
commit ba1c1853a7
No known key found for this signature in database
GPG Key ID: 090B11993D9AEBB5
2 changed files with 99 additions and 19 deletions

View File

@ -20,6 +20,7 @@
#:use-module (guix base16) #:use-module (guix base16)
#:use-module (guix build utils) #:use-module (guix build utils)
#:use-module ((guix build syscalls) #:select (mkdtemp!)) #:use-module ((guix build syscalls) #:select (mkdtemp!))
#:use-module (web uri)
#:use-module (web client) #:use-module (web client)
#:use-module (web response) #:use-module (web response)
#:use-module (json) #:use-module (json)
@ -32,6 +33,9 @@
#:use-module (ice-9 popen) #:use-module (ice-9 popen)
#:use-module ((ice-9 ftw) #:select (scandir)) #:use-module ((ice-9 ftw) #:select (scandir))
#:export (%swh-base-url #:export (%swh-base-url
%allow-request?
request-rate-limit-reached?
origin? origin?
origin-id origin-id
@ -196,31 +200,71 @@ Software Heritage."
((? string? str) str) ((? string? str) str)
((? null?) #f))) ((? null?) #f)))
(define %allow-request?
;; Takes a URL and method (e.g., the 'http-get' procedure) and returns true
;; to keep going. This can be used to disallow a requests when
;; 'request-rate-limit-reached?' returns true, for instance.
(make-parameter (const #t)))
;; The time when the rate limit for "/origin/save" POST requests and that of
;; other requests will be reset.
;; See <https://archive.softwareheritage.org/api/#rate-limiting>.
(define %save-rate-limit-reset-time 0)
(define %general-rate-limit-reset-time 0)
(define (request-rate-limit-reached? url method)
"Return true if the rate limit has been reached for URI."
(define uri
(string->uri url))
(define reset-time
(if (and (eq? method http-post)
(string-prefix? "/api/1/origin/save/" (uri-path uri)))
%save-rate-limit-reset-time
%general-rate-limit-reset-time))
(< (car (gettimeofday)) reset-time))
(define (update-rate-limit-reset-time! url method response)
"Update the rate limit reset time for URL and METHOD based on the headers in
RESPONSE."
(let ((uri (string->uri url)))
(match (assq-ref (response-headers response) 'x-ratelimit-reset)
((= string->number (? number? reset))
(if (and (eq? method http-post)
(string-prefix? "/api/1/origin/save/" (uri-path uri)))
(set! %save-rate-limit-reset-time reset)
(set! %general-rate-limit-reset-time reset)))
(_
#f))))
(define* (call url decode #:optional (method http-get) (define* (call url decode #:optional (method http-get)
#:key (false-if-404? #t)) #:key (false-if-404? #t))
"Invoke the endpoint at URL using METHOD. Decode the resulting JSON body "Invoke the endpoint at URL using METHOD. Decode the resulting JSON body
using DECODE, a one-argument procedure that takes an input port. When using DECODE, a one-argument procedure that takes an input port. When
FALSE-IF-404? is true, return #f upon 404 responses." FALSE-IF-404? is true, return #f upon 404 responses."
(let*-values (((response port) (and ((%allow-request?) url method)
(method url #:streaming? #t))) (let*-values (((response port)
;; See <https://archive.softwareheritage.org/api/#rate-limiting>. (method url #:streaming? #t)))
(match (assq-ref (response-headers response) 'x-ratelimit-remaining) ;; See <https://archive.softwareheritage.org/api/#rate-limiting>.
(#f #t) (match (assq-ref (response-headers response) 'x-ratelimit-remaining)
((? (compose zero? string->number)) (#f #t)
(throw 'swh-error url response)) ((? (compose zero? string->number))
(_ #t)) (update-rate-limit-reset-time! url method response)
(throw 'swh-error url method response))
(_ #t))
(cond ((= 200 (response-code response)) (cond ((= 200 (response-code response))
(let ((result (decode port))) (let ((result (decode port)))
(close-port port) (close-port port)
result)) result))
((and false-if-404? ((and false-if-404?
(= 404 (response-code response))) (= 404 (response-code response)))
(close-port port) (close-port port)
#f) #f)
(else (else
(close-port port) (close-port port)
(throw 'swh-error url response))))) (throw 'swh-error url method response))))))
(define-syntax define-query (define-syntax define-query
(syntax-rules (path) (syntax-rules (path)

View File

@ -19,6 +19,7 @@
(define-module (test-swh) (define-module (test-swh)
#:use-module (guix swh) #:use-module (guix swh)
#:use-module (guix tests http) #:use-module (guix tests http)
#:use-module (web response)
#:use-module (srfi srfi-64)) #:use-module (srfi srfi-64))
;; Test the JSON mapping machinery used in (guix swh). ;; Test the JSON mapping machinery used in (guix swh).
@ -68,6 +69,41 @@
(directory-entry-length entry))) (directory-entry-length entry)))
(lookup-directory "123")))) (lookup-directory "123"))))
(test-equal "rate limit reached"
3000000000
(let ((too-many (build-response
#:code 429
#:reason-phrase "Too many requests"
;; Pretend we've reached the limit and it'll be reset in
;; June 2065.
#:headers '((x-ratelimit-remaining . "0")
(x-ratelimit-reset . "3000000000")))))
(with-http-server `((,too-many "Too bad."))
(parameterize ((%swh-base-url (%local-url)))
(catch 'swh-error
(lambda ()
(lookup-origin "http://example.org/guix.git"))
(lambda (key url method response)
;; Ensure the reset time was recorded.
(@@ (guix swh) %general-rate-limit-reset-time)))))))
(test-assert "%allow-request? and request-rate-limit-reached?"
;; Here we test two things: that the rate limit set above is in effect and
;; that %ALLOW-REQUEST? is called, and that 'request-rate-limit-reached?'
;; returns true.
(let* ((key (gensym "skip-request"))
(skip-if-limit-reached
(lambda (url method)
(or (not (request-rate-limit-reached? url method))
(throw key #t)))))
(parameterize ((%allow-request? skip-if-limit-reached))
(catch key
(lambda ()
(lookup-origin "http://example.org/guix.git")
#f)
(const #t)))))
(test-end "swh") (test-end "swh")
;; Local Variables: ;; Local Variables: