diff --git a/scripts/generate.in b/scripts/generate.in new file mode 100644 index 0000000..9b57a99 --- /dev/null +++ b/scripts/generate.in @@ -0,0 +1,61 @@ +#!@GUILE@ --no-auto-compile +-*- scheme -*- +!# + +(use-modules + (ice-9 pretty-print) + (ice-9 peg) + (web uri) + (web request)) + +(define stdout (current-output-port)) + +(define east-asian-url + "https://unicode.org/Public/13.0.0/ucd/EastAsianWidth.txt") + +(define emoji-url + "https://unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt") + +(define-peg-pattern @hex body (peg "[a-fA-F0-9]")) + +(define-peg-pattern @codepoint all + (* @hex)) + +(define-peg-pattern @ea-width-prop all + (or "A" "F" "H" "N" "Na" "W")) + +(define-peg-pattern @codepoint-range all + (or + (and @codepoint (ignore "..") @codepoint) + @codepoint)) + +(define-peg-pattern @ea-datum body + (and @codepoint-range (ignore ";") @ea-width-prop)) + +(define-peg-pattern @comment none + (and "#" peg-any)) + +(define-peg-pattern @ea-line body + (and (? @ea-datum) (? @comment))) + + +(define (process-east-asian-line line) + (let ((tree (peg:tree (match-pattern @ea-line line)))) + (unless (null? tree) + (pk tree)))) + + +(define (wget-to-lines url) + (string-split (http-get (string->uri url)) #\newline)) + +(with-output-to-file "../runewidth/table.scm" + (λ () + + (display ";; Code generated by script/generate. DO NOT EDIT\n\n") + (display "(define-module (runewidth table))\n\n") + + (format "Downloading and processing from ~a..." east-asian-url) + (for-each process-east-asian-line (wget-to-lines east-asian-url))) + + (format "Downloading and processing from ~a..." emoji-url) + (for-each process-emoji-line (wget-to-lines emoji-url))))