(define-module (runewidth eastasian locale) #:use-module (ice-9 regex) #:export (eastasian-locale?)) (define wide-locales (list "utf-8" "utf8" "jis" "eucjp" "euckr" "euccn" "sjis" "cp932" "cp51932" "cp936" "cp949" "cp950" "big5" "gbk" "gb2312")) ;; algorithm from: ;; https://github.com/mattn/go-runewidth/blob/master/runewidth_posix.go ;; For extracting the charset part of the locale string (some locales require this) ;; Note regex and capture group different as guile does not support 'non-capturing group' syntax (define charset-regexp (make-regexp "^[a-z][a-z][a-z]?(_[A-Z][A-Z])?\\.(.+)")) ;; POSIX, C, C-asdfs or C.asdfdsf (define c-or-posix-regexp (make-regexp "^(POSIX$|C($|[\\.-]))")) (define (get-env-locale) (or (getenv "LC_ALL") (getenv "LC_CTYPE") (getenv "LANG") "")) (define* (eastasian-locale? #:optional (locale (get-env-locale))) "Check if a given locale (or the currently installed locale from the environment) is considered an east asian locale" (unless (string? locale) (error "`locale' must be a string")) (define charset (string-downcase (or ;; Separating out locale from charset with our regex (let ((locale-match (regexp-exec charset-regexp locale))) (and (regexp-match? locale-match) (= 3 (match:count locale-match)) (match:substring locale-match 2))) locale))) (and (not (regexp-exec c-or-posix-regexp locale)) (not (string-suffix? "@cjk_narrow" charset)) (let ((index-@ (string-index charset #\@))) ;; strip @foo from the end of the charset (when index-@ (set! charset (substring charset 0 index-@))) (and (member charset wide-locales) (or (not (eq? #\u (string-ref charset 0))) (string-prefix? "ja" locale) (string-prefix? "ko" locale) (string-prefix? "zh" locale))))))