#!@GUILE@ --no-auto-compile -*- scheme -*- !# ;; Can be called with a trailing argument pointing to the file on disk. (use-modules (runewidth internal) (ice-9 pretty-print) (ice-9 peg) (ice-9 format) (ice-9 exceptions) (ice-9 match) (ice-9 hash-table) (srfi srfi-1)) (define stdout (current-output-port)) (define east-asian-url "https://unicode.org/Public/13.0.0/ucd/EastAsianWidth.txt") (define-peg-pattern @ea-width-prop all (or "A" "F" "H" "Na" "N" "W")) (define-peg-pattern @ea-datum body (and @codepoint-range (* @ws) (ignore ";") (* @ws) @ea-width-prop)) (define-peg-pattern @ea-line body (and @ea-datum (* @ws) @comment)) (define ea-chars-ht (make-hash-table 6)) (define (process-east-asian-line line) (define (cons-ht! key low high) (let* ((old (hashq-ref ea-chars-ht key)) (value (list low high)) (new-lst (if old (cons value old) (list value)))) (hashq-set! ea-chars-ht key new-lst))) (define tree (peg:tree (match-pattern @ea-line line))) (unless (or (not tree) (null? tree) (eq? '@comment (car tree))) (match tree (((('@codepoint-range ('@codepoint codepoints) ...) ('@ea-width-prop width-prop)) ('@comment comment)) (with-exception-handler (λ (err) (format stdout "Skipping line due to error :: ") (format-exception-msg stdout err)) (λ () (let ((f (hex-string->integer (first codepoints))) (l (hex-string->integer (last codepoints)))) (when (or (in-surrogate-range f) (in-surrogate-range l)) (error (format #f "chars in surrogate range ~x -> ~x" f l))) (if (string-contains comment "COMBINING") (cons-ht! 'combining f l) (match width-prop ((or "W" "F") (cons-ht! 'doublewidth f l)) ("H" (cons-ht! 'halfwidth f l)) ("Na" (cons-ht! 'narrow f l)) ("N" (cons-ht! 'neutral f l)) ("A" (cons-ht! 'ambiguous f l)))))) #:unwind? #t))))) (define ea-sets '(combining doublewidth halfwidth narrow neutral ambiguous)) (define ea-symbol-names (map (λ (set) (string->symbol (string-concatenate (list "char-set:eastasian-" (symbol->string set))))) ea-sets)) (define ea-sets-and-symbols (zip ea-sets ea-symbol-names)) (define line-func (if (= 2 (length (command-line))) (λ () (file-to-lines (last (command-line)) stdout)) (λ () (wget-to-lines east-asian-url stdout)))) (define file "runewidth/eastasian.scm") (format stdout "Writing to ~a...\n" file) (with-output-to-file file (λ () (display ";; Code generated by script/generate. DO NOT EDIT\n\n") (for-each process-east-asian-line (line-func)) (pretty-print `(define-module (runewidth eastasian) #:use-module (ice-9 hash-table) #:use-module (srfi srfi-1) #:export ,ea-symbol-names)) (pretty-print `(define chars-ht (alist->hashq-table ',(hash-map->list cons ea-chars-ht)))) (display "\n") (pretty-print `(define-syntax-rule (ranges->charset! name symbol) (let* ((pairs (hashq-ref chars-ht name))) (for-each (λ (pair) (ucs-range->char-set! (first pair) ;; Exclusive upper range, so add one (+ (second pair) 1) #t symbol)) pairs)))) (display "\n") (for-each (λ (sym) (pretty-print `(define ,sym (char-set)))) ea-symbol-names) (display "\n") (for-each (λ (set-pair) (let ((name (first set-pair)) (symbol (second set-pair))) (pretty-print `(ranges->charset! ',name ,symbol)))) ea-sets-and-symbols) (display "Code generation complete.\n" stdout))) (format stdout "Written to ~a.\n" file)