165 lines
4.1 KiB
Scheme
165 lines
4.1 KiB
Scheme
#!@GUILE@ --no-auto-compile
|
|
-*- scheme -*-
|
|
!#
|
|
|
|
;; Can be called with a trailing argument pointing to the file on disk.
|
|
|
|
(use-modules
|
|
(runewidth internal)
|
|
(ice-9 pretty-print)
|
|
(ice-9 peg)
|
|
(ice-9 format)
|
|
(ice-9 exceptions)
|
|
(ice-9 match)
|
|
(ice-9 hash-table)
|
|
(srfi srfi-1))
|
|
|
|
(define stdout (current-output-port))
|
|
|
|
(define east-asian-url
|
|
"https://unicode.org/Public/13.0.0/ucd/EastAsianWidth.txt")
|
|
|
|
(define-peg-pattern @ea-width-prop all
|
|
(or "A" "F" "H" "Na" "N" "W"))
|
|
|
|
(define-peg-pattern @ea-datum body
|
|
(and @codepoint-range (* @ws) (ignore ";") (* @ws) @ea-width-prop))
|
|
|
|
(define-peg-pattern @ea-line body
|
|
(and @ea-datum (* @ws) @comment))
|
|
|
|
(define ea-chars-ht (make-hash-table 6))
|
|
|
|
(define (process-east-asian-line line)
|
|
(define (cons-ht! key low high)
|
|
(let* ((old (hashq-ref ea-chars-ht key))
|
|
(value (list low high))
|
|
(new-lst
|
|
(if old
|
|
(cons value old)
|
|
(list value))))
|
|
(hashq-set! ea-chars-ht key new-lst)))
|
|
|
|
(define tree (peg:tree (match-pattern @ea-line line)))
|
|
|
|
(unless (or (not tree)
|
|
(null? tree)
|
|
(eq? '@comment (car tree)))
|
|
|
|
(match tree
|
|
(((('@codepoint-range
|
|
('@codepoint codepoints) ...)
|
|
('@ea-width-prop width-prop))
|
|
('@comment comment))
|
|
|
|
(with-exception-handler
|
|
(λ (err)
|
|
(format stdout "Skipping line due to error :: ")
|
|
(format-exception-msg stdout err))
|
|
(λ ()
|
|
(let ((f (hex-string->integer (first codepoints)))
|
|
(l (hex-string->integer (last codepoints))))
|
|
|
|
(when (or (in-surrogate-range f)
|
|
(in-surrogate-range l))
|
|
(error (format #f "chars in surrogate range ~x -> ~x" f l)))
|
|
|
|
(if (string-contains comment "COMBINING")
|
|
(cons-ht! 'combining f l)
|
|
(match width-prop
|
|
((or "W" "F")
|
|
(cons-ht! 'doublewidth f l))
|
|
("H"
|
|
(cons-ht! 'halfwidth f l))
|
|
("Na"
|
|
(cons-ht! 'narrow f l))
|
|
("N"
|
|
(cons-ht! 'neutral f l))
|
|
("A"
|
|
(cons-ht! 'ambiguous f l))))))
|
|
#:unwind? #t)))))
|
|
|
|
(define ea-sets
|
|
'(combining
|
|
doublewidth
|
|
halfwidth
|
|
narrow
|
|
neutral
|
|
ambiguous))
|
|
|
|
(define ea-symbol-names
|
|
(map
|
|
(λ (set)
|
|
(string->symbol
|
|
(string-concatenate
|
|
(list "char-set:eastasian-"
|
|
(symbol->string set)))))
|
|
ea-sets))
|
|
|
|
(define ea-sets-and-symbols
|
|
(zip ea-sets ea-symbol-names))
|
|
|
|
|
|
(define line-func
|
|
(if (= 2 (length (command-line)))
|
|
(λ ()
|
|
(file-to-lines (last (command-line)) stdout))
|
|
(λ ()
|
|
(wget-to-lines east-asian-url stdout))))
|
|
|
|
(define file "runewidth/eastasian.scm")
|
|
|
|
(format stdout "Writing to ~a...\n" file)
|
|
|
|
(with-output-to-file file
|
|
(λ ()
|
|
(display ";; Code generated by script/generate. DO NOT EDIT\n\n")
|
|
|
|
(for-each process-east-asian-line (line-func))
|
|
|
|
(pretty-print
|
|
`(define-module (runewidth eastasian)
|
|
#:use-module (ice-9 hash-table)
|
|
#:use-module (srfi srfi-1)
|
|
#:export
|
|
,ea-symbol-names))
|
|
|
|
(pretty-print
|
|
`(define chars-ht
|
|
(alist->hashq-table ',(hash-map->list cons ea-chars-ht))))
|
|
|
|
(display "\n")
|
|
|
|
(pretty-print
|
|
`(define-syntax-rule (ranges->charset! name symbol)
|
|
(let* ((pairs (hashq-ref chars-ht name)))
|
|
(for-each
|
|
(λ (pair)
|
|
(ucs-range->char-set!
|
|
(first pair)
|
|
;; Exclusive upper range, so add one
|
|
(+ (second pair) 1)
|
|
#t symbol))
|
|
pairs))))
|
|
|
|
(display "\n")
|
|
|
|
(for-each
|
|
(λ (sym)
|
|
(pretty-print
|
|
`(define ,sym (char-set))))
|
|
ea-symbol-names)
|
|
|
|
(display "\n")
|
|
|
|
(for-each
|
|
(λ (set-pair)
|
|
(let ((name (first set-pair))
|
|
(symbol (second set-pair)))
|
|
(pretty-print
|
|
`(ranges->charset! ',name ,symbol))))
|
|
ea-sets-and-symbols)
|
|
|
|
(display "Code generation complete.\n" stdout)))
|
|
|
|
(format stdout "Written to ~a.\n" file)
|