guile-uniseg/uniseg/eastasian/locale.scm

69 lines
1.9 KiB
Scheme

(define-module (uniseg eastasian locale)
#:use-module (ice-9 regex)
#:export (eastasian-locale?))
(define wide-locales
(list
"utf-8"
"utf8"
"jis"
"eucjp"
"euckr"
"euccn"
"sjis"
"cp932"
"cp51932"
"cp936"
"cp949"
"cp950"
"big5"
"gbk"
"gb2312"))
;; algorithm from:
;; https://github.com/mattn/go-uniseg/blob/master/uniseg_posix.go
;; For extracting the charset part of the locale string (some locales require this)
;; Note regex and capture group different as guile does not support 'non-capturing group' syntax
(define charset-regexp (make-regexp "^[a-z][a-z][a-z]?(_[A-Z][A-Z])?\\.(.+)"))
;; POSIX, C, C-asdfs or C.asdfdsf
(define c-or-posix-regexp (make-regexp "^(POSIX$|C($|[\\.-]))"))
(define (get-env-locale)
(or (getenv "LC_ALL")
(getenv "LC_CTYPE")
(getenv "LANG")
""))
(define* (eastasian-locale? #:optional (locale (get-env-locale)))
"Check if a given locale (or the currently installed locale from the environment) is considered an east asian locale"
(unless (string? locale)
(error "`locale' must be a string"))
(define charset
(string-downcase
(or
;; Separating out locale from charset with our regex
(let ((locale-match (regexp-exec charset-regexp locale)))
(and
(regexp-match? locale-match)
(= 3 (match:count locale-match))
(match:substring locale-match 2)))
locale)))
(and
(not (regexp-exec c-or-posix-regexp locale))
(not (string-suffix? "@cjk_narrow" charset))
(let ((index-@ (string-index charset #\@)))
;; strip @foo from the end of the charset
(when index-@
(set! charset (substring charset 0 index-@)))
(and (member charset wide-locales)
(or
(not (eq? #\u (string-ref charset 0)))
(string-prefix? "ja" locale)
(string-prefix? "ko" locale)
(string-prefix? "zh" locale))))))