#!@GUILE@ --no-auto-compile -*- scheme -*- !# ;; Can be called with a trailing argument pointing to the file on disk. (use-modules (uniseg internal) (ice-9 pretty-print) (ice-9 peg) (ice-9 format) (ice-9 exceptions) (ice-9 match) (srfi srfi-1)) (define stdout (current-output-port)) (define emoji-url "https://unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt") (define-peg-pattern @emoji-category all (* (peg "[a-zA-Z_]"))) (define-peg-pattern @emoji-datum body (and @codepoint-range (* @ws) (ignore ";") (* @ws) @emoji-category)) (define-peg-pattern @emoji-line body (and @emoji-datum (* @ws) @comment)) (define emoji-list '()) (define (process-emoji-line line) (define tree (peg:tree (match-pattern @emoji-line line))) (unless (or (not tree) (null? tree) (eq? '@comment (car tree))) (match tree (((('@codepoint-range ('@codepoint codepoints) ...) ('@emoji-category category)) ('@comment comment)) (with-exception-handler (λ (err) (format stdout "Skipping line due to error :: ") (format-exception-msg stdout err)) (λ () (let ((f (hex-string->integer (first codepoints))) (l (hex-string->integer (last codepoints)))) (when (or (in-surrogate-range f) (in-surrogate-range l)) (error (format #f "chars in surrogate range ~x -> ~x" f l))) (when (and (equal? "Extended_Pictographic" category) (> l #xFF)) (set! emoji-list (cons (list f l) emoji-list))))) #:unwind? #t))))) (define line-func (if (= 2 (length (command-line))) (λ () (file-to-lines (last (command-line)) stdout)) (λ () (wget-to-lines emoji-url stdout)))) (define file "uniseg/emoji.scm") (format stdout "Writing to ~a...\n" file) (with-output-to-file file (λ () (format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line))) (for-each process-emoji-line (line-func)) (pretty-print `(define-module (uniseg emoji) #:use-module (srfi srfi-1) #:export (char-set:extended-pictographic))) (pretty-print `(define emoji-list ',emoji-list)) (display "\n") (pretty-print `(define char-set:extended-pictographic (char-set))) (display "\n") (pretty-print `(for-each (λ (pair) (ucs-range->char-set! (first pair) (+ 1 (second pair)) #t char-set:extended-pictographic)) emoji-list)) (display "Code generation complete.\n" stdout))) (format stdout "Written to ~a.\n" file)