#!@GUILE@ --no-auto-compile -*- scheme -*- !# ;; Can be called with a trailing argument pointing to the file on disk. (use-modules (uniseg internal) (ice-9 pretty-print) (ice-9 peg) (ice-9 format) (ice-9 exceptions) (ice-9 match) (ice-9 hash-table) (srfi srfi-1)) (define stdout (current-output-port)) (define url "https://www.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt") (define grapheme-ht (make-hash-table 13)) (define grapheme-properties '(hangul-syllable-l hangul-syllable-v hangul-syllable-lv hangul-syllable-lvt prepend carriage-return line-feed control extend regional-indicator spacing-mark zero-width-joiner)) (define grapheme-symbols (map (λ (prop) (symbol-with-prefix "char-set:grapheme-" prop)) grapheme-properties)) (define (string->property str comment) (match str ("L" 'hangul-syllable-l) ("V" 'hangul-syllable-v) ("T" 'hangul-syllable-t) ("LV" 'hangul-syllable-lv) ("LVT" 'hangul-syllable-lvt) ("Prepend" 'prepend) ("CR" 'carriage-return) ("LF" 'line-feed) ("Control" 'control) ("Extend" 'extend) ("Regional_Indicator" 'regional-indicator) ("SpacingMark" 'spacing-mark) ("ZWJ" 'zero-width-joiner))) (define file "uniseg/charsets/graphemes.scm") (format stdout "Writing to ~a...\n" file) (with-output-to-file file (λ () (format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (basename (current-filename))) (pretty-print `(define-module (uniseg charsets graphemes) #:use-module (ice-9 hash-table) #:use-module (srfi srfi-1) #:use-module (uniseg internal) #:use-module (uniseg charsets emoji) #:export (,@grapheme-symbols grapheme-charsets))) (define-values (process-line print-to-file) (make-line-processor grapheme-ht string->property grapheme-properties grapheme-symbols 'grapheme-charsets stdout)) (for-each process-line (cmdline-wget-or-file url stdout)) (print-to-file) ;; Need emoji in the set as well. (pretty-print `(set! grapheme-charsets (cons (list 'extended-pictographic char-set:emoji-extended-pictographic) grapheme-charsets))) (display "Code generation complete.\n" stdout))) (format stdout "Written to ~a.\n" file)