105 lines
2.6 KiB
Scheme
105 lines
2.6 KiB
Scheme
#!@GUILE@ --no-auto-compile
|
|
-*- scheme -*-
|
|
!#
|
|
|
|
;; Can be called with a trailing argument pointing to the file on disk.
|
|
|
|
(use-modules
|
|
(uniseg internal)
|
|
(ice-9 pretty-print)
|
|
(ice-9 peg)
|
|
(ice-9 format)
|
|
(ice-9 exceptions)
|
|
(ice-9 match)
|
|
(srfi srfi-1))
|
|
|
|
(define stdout (current-output-port))
|
|
|
|
(define emoji-url
|
|
"https://unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt")
|
|
|
|
(define-peg-pattern @emoji-category all
|
|
(* (peg "[a-zA-Z_]")))
|
|
|
|
(define-peg-pattern @emoji-datum body
|
|
(and @codepoint-range (* @ws) (ignore ";") (* @ws) @emoji-category))
|
|
|
|
(define-peg-pattern @emoji-line body
|
|
(and @emoji-datum (* @ws) @comment))
|
|
|
|
(define emoji-list '())
|
|
|
|
(define (process-emoji-line line)
|
|
(define tree (peg:tree (match-pattern @emoji-line line)))
|
|
|
|
(unless (or (not tree)
|
|
(null? tree)
|
|
(eq? '@comment (car tree)))
|
|
|
|
(match tree
|
|
(((('@codepoint-range
|
|
('@codepoint codepoints) ...)
|
|
('@emoji-category category))
|
|
('@comment comment))
|
|
|
|
(with-exception-handler
|
|
(λ (err)
|
|
(format stdout "Skipping line due to error :: ")
|
|
(format-exception-msg stdout err))
|
|
(λ ()
|
|
(let ((f (hex-string->integer (first codepoints)))
|
|
(l (hex-string->integer (last codepoints))))
|
|
|
|
(when (or (in-surrogate-range f)
|
|
(in-surrogate-range l))
|
|
(error (format #f "chars in surrogate range ~x -> ~x" f l)))
|
|
|
|
(when (and (equal? "Extended_Pictographic" category)
|
|
(> l #xFF))
|
|
(set! emoji-list (cons (list f l) emoji-list)))))
|
|
#:unwind? #t)))))
|
|
|
|
(define line-func
|
|
(if (= 2 (length (command-line)))
|
|
(λ ()
|
|
(file-to-lines (last (command-line)) stdout))
|
|
(λ ()
|
|
(wget-to-lines emoji-url stdout))))
|
|
|
|
(define file "uniseg/emoji.scm")
|
|
|
|
(format stdout "Writing to ~a...\n" file)
|
|
|
|
(with-output-to-file file
|
|
(λ ()
|
|
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line)))
|
|
|
|
(for-each process-emoji-line (line-func))
|
|
|
|
(pretty-print
|
|
`(define-module (uniseg emoji)
|
|
#:use-module (srfi srfi-1)
|
|
#:export (char-set:extended-pictographic)))
|
|
|
|
(pretty-print
|
|
`(define emoji-list ',emoji-list))
|
|
|
|
(display "\n")
|
|
|
|
(pretty-print
|
|
`(define char-set:extended-pictographic (char-set)))
|
|
|
|
(display "\n")
|
|
|
|
(pretty-print
|
|
`(for-each
|
|
(λ (pair)
|
|
(ucs-range->char-set!
|
|
(first pair)
|
|
(+ 1 (second pair))
|
|
#t char-set:extended-pictographic))
|
|
emoji-list))
|
|
|
|
(display "Code generation complete.\n" stdout)))
|
|
|
|
(format stdout "Written to ~a.\n" file)
|