Cleanups and reducing duplicates
This commit is contained in:
parent
92dc5a8908
commit
a03c6d2348
2
guix.scm
2
guix.scm
|
@ -80,7 +80,7 @@
|
|||
"GUILE_LOAD_COMPILED_PATH"
|
||||
(compiled-dir out version)
|
||||
(compiled-dir "" version))))
|
||||
,''("generate-east-asian"
|
||||
,''("generate-eastasian"
|
||||
"generate-emoji"
|
||||
"generate-graphemes"))
|
||||
#t))))))))
|
||||
|
|
2
hall.scm
2
hall.scm
|
@ -36,7 +36,7 @@
|
|||
(programs
|
||||
((directory
|
||||
"scripts"
|
||||
((in-file "generate-east-asian")
|
||||
((in-file "generate-eastasian")
|
||||
(in-file "generate-emoji")
|
||||
(in-file "generate-graphemes")))))
|
||||
(documentation
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
(define stdout (current-output-port))
|
||||
|
||||
(define east-asian-url
|
||||
(define url
|
||||
"https://unicode.org/Public/13.0.0/ucd/EastAsianWidth.txt")
|
||||
|
||||
(define-peg-pattern @ea-width-prop all
|
||||
|
@ -30,7 +30,7 @@
|
|||
|
||||
(define eastasian-ht (make-hash-table 6))
|
||||
|
||||
(define (process-east-asian-line line)
|
||||
(define (process-line line)
|
||||
(define (string->property str comment)
|
||||
(if (string-contains comment "COMBINING")
|
||||
'combining
|
||||
|
@ -90,22 +90,15 @@
|
|||
(zip ea-sets ea-symbol-names))
|
||||
|
||||
|
||||
(define line-func
|
||||
(if (= 2 (length (command-line)))
|
||||
(λ ()
|
||||
(file-to-lines (last (command-line)) stdout))
|
||||
(λ ()
|
||||
(wget-to-lines east-asian-url stdout))))
|
||||
|
||||
(define file "uniseg/eastasian.scm")
|
||||
|
||||
(format stdout "Writing to ~a...\n" file)
|
||||
|
||||
(with-output-to-file file
|
||||
(λ ()
|
||||
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line)))
|
||||
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (basename (current-filename)))
|
||||
|
||||
(for-each process-east-asian-line (line-func))
|
||||
(for-each process-line (cmdline-wget-or-file url stdout))
|
||||
|
||||
(pretty-print
|
||||
`(define-module (uniseg eastasian)
|
||||
|
@ -152,4 +145,3 @@
|
|||
(display "Code generation complete.\n" stdout)))
|
||||
|
||||
(format stdout "Written to ~a.\n" file)
|
||||
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
(define stdout (current-output-port))
|
||||
|
||||
(define emoji-url
|
||||
(define url
|
||||
"https://unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt")
|
||||
|
||||
(define-peg-pattern @emoji-category all
|
||||
|
@ -49,7 +49,7 @@
|
|||
(define emoji-sets-and-symbols
|
||||
(zip emoji-sets emoji-symbol-names))
|
||||
|
||||
(define (process-emoji-line line)
|
||||
(define (process-line line)
|
||||
(define (string->category str)
|
||||
(match str
|
||||
("Emoji" 'emoji)
|
||||
|
@ -87,22 +87,15 @@
|
|||
(cons-hash-list! emoji-ht category f l)))
|
||||
#:unwind? #t)))))
|
||||
|
||||
(define line-func
|
||||
(if (= 2 (length (command-line)))
|
||||
(λ ()
|
||||
(file-to-lines (last (command-line)) stdout))
|
||||
(λ ()
|
||||
(wget-to-lines emoji-url stdout))))
|
||||
|
||||
(define file "uniseg/emoji.scm")
|
||||
|
||||
(format stdout "Writing to ~a...\n" file)
|
||||
|
||||
(with-output-to-file file
|
||||
(λ ()
|
||||
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line)))
|
||||
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (basename (current-filename)))
|
||||
|
||||
(for-each process-emoji-line (line-func))
|
||||
(for-each process-line (cmdline-wget-or-file url stdout))
|
||||
|
||||
(pretty-print
|
||||
`(define-module (uniseg emoji)
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
(define stdout (current-output-port))
|
||||
|
||||
(define grapheme-url
|
||||
(define url
|
||||
"https://www.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt")
|
||||
|
||||
(define-peg-pattern @grapheme-category all
|
||||
|
@ -56,7 +56,7 @@
|
|||
(define grapheme-sets-and-symbols
|
||||
(zip grapheme-sets grapheme-symbol-names))
|
||||
|
||||
(define (process-grapheme-line line)
|
||||
(define (process-line line)
|
||||
(define (string->category str)
|
||||
(match str
|
||||
("L" 'hangul-syllable-l)
|
||||
|
@ -101,12 +101,6 @@
|
|||
(cons-hash-list! grapheme-ht category f l)))
|
||||
#:unwind? #t)))))
|
||||
|
||||
(define line-func
|
||||
(if (= 2 (length (command-line)))
|
||||
(λ ()
|
||||
(file-to-lines (last (command-line)) stdout))
|
||||
(λ ()
|
||||
(wget-to-lines grapheme-url stdout))))
|
||||
|
||||
(define file "uniseg/graphemes.scm")
|
||||
|
||||
|
@ -114,9 +108,9 @@
|
|||
|
||||
(with-output-to-file file
|
||||
(λ ()
|
||||
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line)))
|
||||
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (basename (current-filename)))
|
||||
|
||||
(for-each process-grapheme-line (line-func))
|
||||
(for-each process-line (cmdline-wget-or-file url stdout))
|
||||
|
||||
(pretty-print
|
||||
`(define-module (uniseg graphemes)
|
||||
|
|
5030
uniseg/eastasian.scm
5030
uniseg/eastasian.scm
File diff suppressed because it is too large
Load Diff
2461
uniseg/emoji.scm
2461
uniseg/emoji.scm
File diff suppressed because it is too large
Load Diff
1192
uniseg/graphemes.scm
1192
uniseg/graphemes.scm
File diff suppressed because it is too large
Load Diff
|
@ -20,8 +20,7 @@
|
|||
hex-string->integer
|
||||
format-exception-msg
|
||||
in-surrogate-range
|
||||
wget-to-lines
|
||||
file-to-lines
|
||||
cmdline-wget-or-file
|
||||
ranges->charset!))
|
||||
|
||||
;;
|
||||
|
@ -84,16 +83,22 @@
|
|||
(and (>= num #xd800)
|
||||
(<= num #xdfff)))
|
||||
|
||||
(define* (wget-to-lines url #:optional (port #f))
|
||||
(format port "Downloading from ~a..." url)
|
||||
(define (cmdline-wget-or-file url log-port)
|
||||
(let ((cmdline (command-line)))
|
||||
(if (= 2 (length cmdline))
|
||||
(file-to-lines (last cmdline) log-port)
|
||||
(wget-to-lines url log-port))))
|
||||
|
||||
(define (wget-to-lines url log-port)
|
||||
(format log-port "Downloading from ~a..." url)
|
||||
|
||||
(let* ((response body (http-get (string->uri url)))
|
||||
(lines (string-split body #\newline)))
|
||||
(format port " done.\n")
|
||||
(format log-port " done.\n")
|
||||
lines))
|
||||
|
||||
(define* (file-to-lines path #:optional (port #f))
|
||||
(format port "Loading from local file ~a\n" path)
|
||||
(define (file-to-lines path log-port)
|
||||
(format log-port "Loading from local file ~a\n" path)
|
||||
(string-split
|
||||
(with-input-from-file path
|
||||
(λ () (get-string-all (current-input-port)))) #\newline))
|
||||
|
|
Loading…
Reference in New Issue