guile-uniseg/runewidth/eastasian/locale.scm

(define-module (runewidth eastasian locale)
  #:use-module (ice-9 regex)
  #:export (eastasian-locale?))

(define wide-locales
  (list
	 "utf-8"
	 "utf8"
	 "jis"
	 "eucjp"
	 "euckr"
	 "euccn"
	 "sjis"
	 "cp932"
	 "cp51932"
	 "cp936"
	 "cp949"
	 "cp950"
	 "big5"
	 "gbk"
	 "gb2312"))

;; algorithm from:
;; https://github.com/mattn/go-runewidth/blob/master/runewidth_posix.go

;; For extracting the charset part of the locale string (some locales require this)
;; Note regex and capture group different as guile does not support 'non-capturing group' syntax
(define charset-regexp (make-regexp "^[a-z][a-z][a-z]?(_[A-Z][A-Z])?\\.(.+)"))

;; POSIX, C, C-asdfs or C.asdfdsf
(define c-or-posix-regexp (make-regexp "^(POSIX$|C($|[\\.-]))"))

(define (get-env-locale)
  (or (getenv "LC_ALL")
      (getenv "LC_CTYPE")
      (getenv "LANG")
      ""))

(define* (eastasian-locale? #:optional (locale (get-env-locale)))
  "Check if a given locale (or the currently installed locale from the environment) is considered an east asian locale"
  (unless (string? locale)
    (error "`locale' must be a string"))

  (define charset
    (string-downcase
     (or
      ;; Separating out locale from charset with our regex
      (let ((locale-match (regexp-exec charset-regexp locale)))
        (and
         (regexp-match? locale-match)
         (= 3 (match:count locale-match))
         (match:substring locale-match 2)))
      locale)))

  (and
   (not (regexp-exec c-or-posix-regexp locale))
   (not (string-suffix? "@cjk_narrow" charset))
   (let ((index-@ (string-index charset #\@)))
     ;; strip @foo from the end of the charset
     (when index-@
       (set! charset (substring charset 0 index-@)))

     (and (member charset wide-locales)
          (or
           (not (eq? #\u (string-ref charset 0)))
           (string-prefix? "ja" locale)
           (string-prefix? "ko" locale)
           (string-prefix? "zh" locale))))))