guile-uniseg/scripts/generate-eastasian.in
Vivianne Langdon 31012d5b8f Yet another reorganization, and solve Christine's 'rude problem'
- We change the stream iterator to *always* return a grapheme
  (except for EOF). The grapheme then gets built up over time.
  - This way, trans flag for example is first white flag, then
  white flag + zwj, etc until it finally transforms into the
  trans flag.
  - Users of the stream library can then use the `modification?'
  flag to determine if the stream value is a modification of the
  prior grapheme instead of a new grapheme.
  - Abstracted iteration to an iterator object to support use cases
  where we don't have an input stream (reflow needs this!)
2024-03-05 11:46:32 -05:00

77 lines
1.7 KiB
Scheme

#!@GUILE@ --no-auto-compile
-*- scheme -*-
!#
;; Can be called with a trailing argument pointing to the file on disk.
(use-modules
(uniseg internal)
(ice-9 pretty-print)
(ice-9 peg)
(ice-9 format)
(ice-9 exceptions)
(ice-9 match)
(ice-9 hash-table)
(srfi srfi-1))
(define stdout (current-output-port))
(define url
"https://unicode.org/Public/13.0.0/ucd/EastAsianWidth.txt")
(define eastasian-ht (make-hash-table 6))
(define (string->property str comment)
(if (string-contains comment "COMBINING")
'combining
(match str
((or "W" "F") 'doublewidth)
("H" 'halfwidth)
("Na" 'narrow)
("N" 'neutral)
("A" 'ambiguous))))
(define eastasian-properties
'(combining
doublewidth
halfwidth
narrow
neutral
ambiguous))
(define eastasian-symbols
(map
(λ (prop) (symbol-with-prefix "char-set:eastasian-" prop))
eastasian-properties))
(define file "uniseg/charsets/eastasian.scm")
(format stdout "Writing to ~a...\n" file)
(with-output-to-file file
(λ ()
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (basename (current-filename)))
(pretty-print
`(define-module (uniseg charsets eastasian)
#:use-module (uniseg internal)
#:use-module (ice-9 hash-table)
#:use-module (srfi srfi-1)
#:export (,@eastasian-symbols
eastasian-charsets)))
(define-values (process-line print-to-file)
(make-line-processor
eastasian-ht
string->property
eastasian-properties
eastasian-symbols
'eastasian-charsets
stdout))
(for-each process-line (cmdline-wget-or-file url stdout))
(print-to-file)
(display "Code generation complete.\n" stdout)))
(format stdout "Written to ~a.\n" file)