Cleanups and reducing duplicates

This commit is contained in:
Vivianne 2024-03-04 14:27:12 -05:00
parent 92dc5a8908
commit a03c6d2348
9 changed files with 4350 additions and 4401 deletions

View File

@ -80,7 +80,7 @@
"GUILE_LOAD_COMPILED_PATH" "GUILE_LOAD_COMPILED_PATH"
(compiled-dir out version) (compiled-dir out version)
(compiled-dir "" version)))) (compiled-dir "" version))))
,''("generate-east-asian" ,''("generate-eastasian"
"generate-emoji" "generate-emoji"
"generate-graphemes")) "generate-graphemes"))
#t)))))))) #t))))))))

View File

@ -36,7 +36,7 @@
(programs (programs
((directory ((directory
"scripts" "scripts"
((in-file "generate-east-asian") ((in-file "generate-eastasian")
(in-file "generate-emoji") (in-file "generate-emoji")
(in-file "generate-graphemes"))))) (in-file "generate-graphemes")))))
(documentation (documentation

View File

@ -16,7 +16,7 @@
(define stdout (current-output-port)) (define stdout (current-output-port))
(define east-asian-url (define url
"https://unicode.org/Public/13.0.0/ucd/EastAsianWidth.txt") "https://unicode.org/Public/13.0.0/ucd/EastAsianWidth.txt")
(define-peg-pattern @ea-width-prop all (define-peg-pattern @ea-width-prop all
@ -30,7 +30,7 @@
(define eastasian-ht (make-hash-table 6)) (define eastasian-ht (make-hash-table 6))
(define (process-east-asian-line line) (define (process-line line)
(define (string->property str comment) (define (string->property str comment)
(if (string-contains comment "COMBINING") (if (string-contains comment "COMBINING")
'combining 'combining
@ -90,22 +90,15 @@
(zip ea-sets ea-symbol-names)) (zip ea-sets ea-symbol-names))
(define line-func
(if (= 2 (length (command-line)))
(λ ()
(file-to-lines (last (command-line)) stdout))
(λ ()
(wget-to-lines east-asian-url stdout))))
(define file "uniseg/eastasian.scm") (define file "uniseg/eastasian.scm")
(format stdout "Writing to ~a...\n" file) (format stdout "Writing to ~a...\n" file)
(with-output-to-file file (with-output-to-file file
(λ () (λ ()
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line))) (format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (basename (current-filename)))
(for-each process-east-asian-line (line-func)) (for-each process-line (cmdline-wget-or-file url stdout))
(pretty-print (pretty-print
`(define-module (uniseg eastasian) `(define-module (uniseg eastasian)
@ -152,4 +145,3 @@
(display "Code generation complete.\n" stdout))) (display "Code generation complete.\n" stdout)))
(format stdout "Written to ~a.\n" file) (format stdout "Written to ~a.\n" file)

View File

@ -15,7 +15,7 @@
(define stdout (current-output-port)) (define stdout (current-output-port))
(define emoji-url (define url
"https://unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt") "https://unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt")
(define-peg-pattern @emoji-category all (define-peg-pattern @emoji-category all
@ -49,7 +49,7 @@
(define emoji-sets-and-symbols (define emoji-sets-and-symbols
(zip emoji-sets emoji-symbol-names)) (zip emoji-sets emoji-symbol-names))
(define (process-emoji-line line) (define (process-line line)
(define (string->category str) (define (string->category str)
(match str (match str
("Emoji" 'emoji) ("Emoji" 'emoji)
@ -87,22 +87,15 @@
(cons-hash-list! emoji-ht category f l))) (cons-hash-list! emoji-ht category f l)))
#:unwind? #t))))) #:unwind? #t)))))
(define line-func
(if (= 2 (length (command-line)))
(λ ()
(file-to-lines (last (command-line)) stdout))
(λ ()
(wget-to-lines emoji-url stdout))))
(define file "uniseg/emoji.scm") (define file "uniseg/emoji.scm")
(format stdout "Writing to ~a...\n" file) (format stdout "Writing to ~a...\n" file)
(with-output-to-file file (with-output-to-file file
(λ () (λ ()
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line))) (format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (basename (current-filename)))
(for-each process-emoji-line (line-func)) (for-each process-line (cmdline-wget-or-file url stdout))
(pretty-print (pretty-print
`(define-module (uniseg emoji) `(define-module (uniseg emoji)

View File

@ -16,7 +16,7 @@
(define stdout (current-output-port)) (define stdout (current-output-port))
(define grapheme-url (define url
"https://www.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt") "https://www.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt")
(define-peg-pattern @grapheme-category all (define-peg-pattern @grapheme-category all
@ -56,7 +56,7 @@
(define grapheme-sets-and-symbols (define grapheme-sets-and-symbols
(zip grapheme-sets grapheme-symbol-names)) (zip grapheme-sets grapheme-symbol-names))
(define (process-grapheme-line line) (define (process-line line)
(define (string->category str) (define (string->category str)
(match str (match str
("L" 'hangul-syllable-l) ("L" 'hangul-syllable-l)
@ -101,12 +101,6 @@
(cons-hash-list! grapheme-ht category f l))) (cons-hash-list! grapheme-ht category f l)))
#:unwind? #t))))) #:unwind? #t)))))
(define line-func
(if (= 2 (length (command-line)))
(λ ()
(file-to-lines (last (command-line)) stdout))
(λ ()
(wget-to-lines grapheme-url stdout))))
(define file "uniseg/graphemes.scm") (define file "uniseg/graphemes.scm")
@ -114,9 +108,9 @@
(with-output-to-file file (with-output-to-file file
(λ () (λ ()
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line))) (format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (basename (current-filename)))
(for-each process-grapheme-line (line-func)) (for-each process-line (cmdline-wget-or-file url stdout))
(pretty-print (pretty-print
`(define-module (uniseg graphemes) `(define-module (uniseg graphemes)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -20,8 +20,7 @@
hex-string->integer hex-string->integer
format-exception-msg format-exception-msg
in-surrogate-range in-surrogate-range
wget-to-lines cmdline-wget-or-file
file-to-lines
ranges->charset!)) ranges->charset!))
;; ;;
@ -84,16 +83,22 @@
(and (>= num #xd800) (and (>= num #xd800)
(<= num #xdfff))) (<= num #xdfff)))
(define* (wget-to-lines url #:optional (port #f)) (define (cmdline-wget-or-file url log-port)
(format port "Downloading from ~a..." url) (let ((cmdline (command-line)))
(if (= 2 (length cmdline))
(file-to-lines (last cmdline) log-port)
(wget-to-lines url log-port))))
(define (wget-to-lines url log-port)
(format log-port "Downloading from ~a..." url)
(let* ((response body (http-get (string->uri url))) (let* ((response body (http-get (string->uri url)))
(lines (string-split body #\newline))) (lines (string-split body #\newline)))
(format port " done.\n") (format log-port " done.\n")
lines)) lines))
(define* (file-to-lines path #:optional (port #f)) (define (file-to-lines path log-port)
(format port "Loading from local file ~a\n" path) (format log-port "Loading from local file ~a\n" path)
(string-split (string-split
(with-input-from-file path (with-input-from-file path
(λ () (get-string-all (current-input-port)))) #\newline)) (λ () (get-string-all (current-input-port)))) #\newline))