guile-uniseg/uniseg/internal.scm

99 lines
2.7 KiB
Scheme
Raw Normal View History

2024-03-03 18:13:22 +00:00
(define-module (uniseg internal)
#:use-module (ice-9 peg)
#:use-module (ice-9 textual-ports)
#:use-module (ice-9 exceptions)
#:use-module (ice-9 i18n)
#:use-module (web uri)
#:use-module (web client)
#:use-module (web request)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-71)
#:export (@hex
@codepoint
@codepoint-range
@comment
@ws
cons-hash-list!
hex-string->integer
format-exception-msg
in-surrogate-range
wget-to-lines
file-to-lines
ranges->charset!))
2024-03-04 01:25:47 +00:00
;;
;; Common PEG patterns
;;
(define-peg-pattern @hex body (peg "[a-fA-F0-9]"))
(define-peg-pattern @codepoint all
(* @hex))
(define-peg-pattern @codepoint-range all
(or
(and @codepoint (ignore "..") @codepoint)
@codepoint))
(define-peg-pattern @comment all
(and (ignore "#") (* peg-any)))
(define-peg-pattern @ws none
(or " " "\t"))
2024-03-04 01:25:47 +00:00
;; Helper macro to add a list of character ranges
;; to a hash-set.
(define-syntax-rule (cons-hash-list! ht key low high)
(let* ((old (hashq-ref ht key))
(value (list low high))
(new-lst
(if old
(cons value old)
(list value))))
(hashq-set! ht key new-lst)))
2024-03-04 01:25:47 +00:00
;; Helper macro to take above character range hash table
;; and insert it into Guile's chasracter set system.
(define-syntax-rule (ranges->charset! ht name symbol)
(let* ((pairs (hashq-ref ht name)))
(for-each
(λ (pair)
(ucs-range->char-set!
(first pair)
;; Exclusive upper range, so add one
(+ (second pair) 1)
#t symbol))
pairs)))
(define (hex-string->integer str)
;; XXX: We would ideally do integer->char here and save it to file as such
;; However read-expr* does not actually work for all the characters!
;; So they can't be written out as such.
(locale-string->integer str 16))
2024-03-04 01:25:47 +00:00
;; This should be in the standard library...
(define (format-exception-msg port err)
(apply format port (exception-message err) (exception-irritants err))
(display "\n" port))
2024-03-04 01:25:47 +00:00
;; Little checker to see if we are in the danger zone
;; (surrogate range is invalid unicode and guile errors)
(define (in-surrogate-range num)
(and (>= num #xd800)
(<= num #xdfff)))
(define* (wget-to-lines url #:optional (port #f))
(format port "Downloading from ~a..." url)
(let* ((response body (http-get (string->uri url)))
(lines (string-split body #\newline)))
(format port " done.\n")
lines))
(define* (file-to-lines path #:optional (port #f))
(format port "Loading from local file ~a\n" path)
(string-split
(with-input-from-file path
(λ () (get-string-all (current-input-port)))) #\newline))