Cleanups and reducing duplicates

This commit is contained in:
Vivianne 2024-03-04 14:27:12 -05:00
parent 92dc5a8908
commit a03c6d2348
9 changed files with 4350 additions and 4401 deletions

View File

@ -80,7 +80,7 @@
"GUILE_LOAD_COMPILED_PATH"
(compiled-dir out version)
(compiled-dir "" version))))
,''("generate-east-asian"
,''("generate-eastasian"
"generate-emoji"
"generate-graphemes"))
#t))))))))

View File

@ -36,7 +36,7 @@
(programs
((directory
"scripts"
((in-file "generate-east-asian")
((in-file "generate-eastasian")
(in-file "generate-emoji")
(in-file "generate-graphemes")))))
(documentation

View File

@ -16,7 +16,7 @@
(define stdout (current-output-port))
(define east-asian-url
(define url
"https://unicode.org/Public/13.0.0/ucd/EastAsianWidth.txt")
(define-peg-pattern @ea-width-prop all
@ -30,7 +30,7 @@
(define eastasian-ht (make-hash-table 6))
(define (process-east-asian-line line)
(define (process-line line)
(define (string->property str comment)
(if (string-contains comment "COMBINING")
'combining
@ -90,22 +90,15 @@
(zip ea-sets ea-symbol-names))
(define line-func
(if (= 2 (length (command-line)))
(λ ()
(file-to-lines (last (command-line)) stdout))
(λ ()
(wget-to-lines east-asian-url stdout))))
(define file "uniseg/eastasian.scm")
(format stdout "Writing to ~a...\n" file)
(with-output-to-file file
(λ ()
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line)))
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (basename (current-filename)))
(for-each process-east-asian-line (line-func))
(for-each process-line (cmdline-wget-or-file url stdout))
(pretty-print
`(define-module (uniseg eastasian)
@ -152,4 +145,3 @@
(display "Code generation complete.\n" stdout)))
(format stdout "Written to ~a.\n" file)

View File

@ -15,7 +15,7 @@
(define stdout (current-output-port))
(define emoji-url
(define url
"https://unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt")
(define-peg-pattern @emoji-category all
@ -49,7 +49,7 @@
(define emoji-sets-and-symbols
(zip emoji-sets emoji-symbol-names))
(define (process-emoji-line line)
(define (process-line line)
(define (string->category str)
(match str
("Emoji" 'emoji)
@ -87,22 +87,15 @@
(cons-hash-list! emoji-ht category f l)))
#:unwind? #t)))))
(define line-func
(if (= 2 (length (command-line)))
(λ ()
(file-to-lines (last (command-line)) stdout))
(λ ()
(wget-to-lines emoji-url stdout))))
(define file "uniseg/emoji.scm")
(format stdout "Writing to ~a...\n" file)
(with-output-to-file file
(λ ()
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line)))
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (basename (current-filename)))
(for-each process-emoji-line (line-func))
(for-each process-line (cmdline-wget-or-file url stdout))
(pretty-print
`(define-module (uniseg emoji)

View File

@ -16,7 +16,7 @@
(define stdout (current-output-port))
(define grapheme-url
(define url
"https://www.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt")
(define-peg-pattern @grapheme-category all
@ -56,7 +56,7 @@
(define grapheme-sets-and-symbols
(zip grapheme-sets grapheme-symbol-names))
(define (process-grapheme-line line)
(define (process-line line)
(define (string->category str)
(match str
("L" 'hangul-syllable-l)
@ -101,12 +101,6 @@
(cons-hash-list! grapheme-ht category f l)))
#:unwind? #t)))))
(define line-func
(if (= 2 (length (command-line)))
(λ ()
(file-to-lines (last (command-line)) stdout))
(λ ()
(wget-to-lines grapheme-url stdout))))
(define file "uniseg/graphemes.scm")
@ -114,9 +108,9 @@
(with-output-to-file file
(λ ()
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line)))
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (basename (current-filename)))
(for-each process-grapheme-line (line-func))
(for-each process-line (cmdline-wget-or-file url stdout))
(pretty-print
`(define-module (uniseg graphemes)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -20,8 +20,7 @@
hex-string->integer
format-exception-msg
in-surrogate-range
wget-to-lines
file-to-lines
cmdline-wget-or-file
ranges->charset!))
;;
@ -84,16 +83,22 @@
(and (>= num #xd800)
(<= num #xdfff)))
(define* (wget-to-lines url #:optional (port #f))
(format port "Downloading from ~a..." url)
(define (cmdline-wget-or-file url log-port)
(let ((cmdline (command-line)))
(if (= 2 (length cmdline))
(file-to-lines (last cmdline) log-port)
(wget-to-lines url log-port))))
(define (wget-to-lines url log-port)
(format log-port "Downloading from ~a..." url)
(let* ((response body (http-get (string->uri url)))
(lines (string-split body #\newline)))
(format port " done.\n")
(format log-port " done.\n")
lines))
(define* (file-to-lines path #:optional (port #f))
(format port "Loading from local file ~a\n" path)
(define (file-to-lines path log-port)
(format log-port "Loading from local file ~a\n" path)
(string-split
(with-input-from-file path
(λ () (get-string-all (current-input-port)))) #\newline))