2024-03-03 18:13:22 +00:00
|
|
|
(define-module (uniseg eastasian locale)
|
2024-02-29 20:17:43 +00:00
|
|
|
#:use-module (ice-9 regex)
|
2024-03-02 18:59:31 +00:00
|
|
|
#:export (eastasian-locale?))
|
2024-02-29 20:17:43 +00:00
|
|
|
|
|
|
|
(define wide-locales
|
|
|
|
(list
|
|
|
|
"utf-8"
|
|
|
|
"utf8"
|
|
|
|
"jis"
|
|
|
|
"eucjp"
|
|
|
|
"euckr"
|
|
|
|
"euccn"
|
|
|
|
"sjis"
|
|
|
|
"cp932"
|
|
|
|
"cp51932"
|
|
|
|
"cp936"
|
|
|
|
"cp949"
|
|
|
|
"cp950"
|
|
|
|
"big5"
|
|
|
|
"gbk"
|
|
|
|
"gb2312"))
|
|
|
|
|
|
|
|
;; algorithm from:
|
2024-03-03 18:13:22 +00:00
|
|
|
;; https://github.com/mattn/go-uniseg/blob/master/uniseg_posix.go
|
2024-02-29 20:17:43 +00:00
|
|
|
|
|
|
|
;; For extracting the charset part of the locale string (some locales require this)
|
|
|
|
;; Note regex and capture group different as guile does not support 'non-capturing group' syntax
|
|
|
|
(define charset-regexp (make-regexp "^[a-z][a-z][a-z]?(_[A-Z][A-Z])?\\.(.+)"))
|
|
|
|
|
|
|
|
;; POSIX, C, C-asdfs or C.asdfdsf
|
|
|
|
(define c-or-posix-regexp (make-regexp "^(POSIX$|C($|[\\.-]))"))
|
|
|
|
|
|
|
|
(define (get-env-locale)
|
|
|
|
(or (getenv "LC_ALL")
|
|
|
|
(getenv "LC_CTYPE")
|
|
|
|
(getenv "LANG")
|
|
|
|
""))
|
|
|
|
|
2024-03-02 18:59:31 +00:00
|
|
|
(define* (eastasian-locale? #:optional (locale (get-env-locale)))
|
2024-02-29 20:17:43 +00:00
|
|
|
"Check if a given locale (or the currently installed locale from the environment) is considered an east asian locale"
|
|
|
|
(unless (string? locale)
|
|
|
|
(error "`locale' must be a string"))
|
|
|
|
|
|
|
|
(define charset
|
|
|
|
(string-downcase
|
|
|
|
(or
|
|
|
|
;; Separating out locale from charset with our regex
|
|
|
|
(let ((locale-match (regexp-exec charset-regexp locale)))
|
|
|
|
(and
|
|
|
|
(regexp-match? locale-match)
|
|
|
|
(= 3 (match:count locale-match))
|
|
|
|
(match:substring locale-match 2)))
|
|
|
|
locale)))
|
|
|
|
|
|
|
|
(and
|
|
|
|
(not (regexp-exec c-or-posix-regexp locale))
|
|
|
|
(not (string-suffix? "@cjk_narrow" charset))
|
|
|
|
(let ((index-@ (string-index charset #\@)))
|
|
|
|
;; strip @foo from the end of the charset
|
|
|
|
(when index-@
|
2024-03-02 18:59:31 +00:00
|
|
|
(set! charset (substring charset 0 index-@)))
|
|
|
|
|
2024-02-29 20:17:43 +00:00
|
|
|
(and (member charset wide-locales)
|
|
|
|
(or
|
|
|
|
(not (eq? #\u (string-ref charset 0)))
|
|
|
|
(string-prefix? "ja" locale)
|
|
|
|
(string-prefix? "ko" locale)
|
|
|
|
(string-prefix? "zh" locale))))))
|