Vivianne Langdon
31012d5b8f
- We change the stream iterator to *always* return a grapheme (except for EOF). The grapheme then gets built up over time. - This way, trans flag for example is first white flag, then white flag + zwj, etc until it finally transforms into the trans flag. - Users of the stream library can then use the `modification?' flag to determine if the stream value is a modification of the prior grapheme instead of a new grapheme. - Abstracted iteration to an iterator object to support use cases where we don't have an input stream (reflow needs this!)
77 lines
1.7 KiB
Scheme
77 lines
1.7 KiB
Scheme
#!@GUILE@ --no-auto-compile
|
|
-*- scheme -*-
|
|
!#
|
|
|
|
;; Can be called with a trailing argument pointing to the file on disk.
|
|
|
|
(use-modules
|
|
(uniseg internal)
|
|
(ice-9 pretty-print)
|
|
(ice-9 peg)
|
|
(ice-9 format)
|
|
(ice-9 exceptions)
|
|
(ice-9 match)
|
|
(ice-9 hash-table)
|
|
(srfi srfi-1))
|
|
|
|
(define stdout (current-output-port))
|
|
|
|
(define url
|
|
"https://unicode.org/Public/13.0.0/ucd/EastAsianWidth.txt")
|
|
|
|
(define eastasian-ht (make-hash-table 6))
|
|
|
|
(define (string->property str comment)
|
|
(if (string-contains comment "COMBINING")
|
|
'combining
|
|
(match str
|
|
((or "W" "F") 'doublewidth)
|
|
("H" 'halfwidth)
|
|
("Na" 'narrow)
|
|
("N" 'neutral)
|
|
("A" 'ambiguous))))
|
|
|
|
(define eastasian-properties
|
|
'(combining
|
|
doublewidth
|
|
halfwidth
|
|
narrow
|
|
neutral
|
|
ambiguous))
|
|
|
|
(define eastasian-symbols
|
|
(map
|
|
(λ (prop) (symbol-with-prefix "char-set:eastasian-" prop))
|
|
eastasian-properties))
|
|
|
|
(define file "uniseg/charsets/eastasian.scm")
|
|
|
|
(format stdout "Writing to ~a...\n" file)
|
|
|
|
(with-output-to-file file
|
|
(λ ()
|
|
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (basename (current-filename)))
|
|
|
|
(pretty-print
|
|
`(define-module (uniseg charsets eastasian)
|
|
#:use-module (uniseg internal)
|
|
#:use-module (ice-9 hash-table)
|
|
#:use-module (srfi srfi-1)
|
|
#:export (,@eastasian-symbols
|
|
eastasian-charsets)))
|
|
|
|
(define-values (process-line print-to-file)
|
|
(make-line-processor
|
|
eastasian-ht
|
|
string->property
|
|
eastasian-properties
|
|
eastasian-symbols
|
|
'eastasian-charsets
|
|
stdout))
|
|
|
|
(for-each process-line (cmdline-wget-or-file url stdout))
|
|
(print-to-file)
|
|
|
|
(display "Code generation complete.\n" stdout)))
|
|
|
|
(format stdout "Written to ~a.\n" file)
|