Adding check for east asian charset with tests
This commit is contained in:
parent
a5bdb2e688
commit
eaaceb3c56
5 changed files with 128 additions and 2 deletions
2
hall.scm
2
hall.scm
|
@ -21,7 +21,7 @@
|
|||
(files (libraries
|
||||
((directory "runewidth" ())
|
||||
(scheme-file "runewidth")))
|
||||
(tests ((directory "tests" ())))
|
||||
(tests ((directory "tests" ((scheme-file "test-posix")))))
|
||||
(programs ())
|
||||
(documentation
|
||||
((org-file "README")
|
||||
|
|
35
runewidth/hconfig.scm
Normal file
35
runewidth/hconfig.scm
Normal file
|
@ -0,0 +1,35 @@
|
|||
(define-module
|
||||
(runewidth hconfig)
|
||||
#:use-module
|
||||
(srfi srfi-26)
|
||||
#:export
|
||||
(%version
|
||||
%author
|
||||
%license
|
||||
%copyright
|
||||
%gettext-domain
|
||||
G_
|
||||
N_
|
||||
init-nls
|
||||
init-locale))
|
||||
|
||||
(define %version "0.1")
|
||||
|
||||
(define %author "Vivanne Langdon")
|
||||
|
||||
(define %license 'gpl3+)
|
||||
|
||||
(define %copyright '(2024))
|
||||
|
||||
(define %gettext-domain "guile-runewidth")
|
||||
|
||||
(define G_ identity)
|
||||
|
||||
(define N_ identity)
|
||||
|
||||
(define (init-nls) "Dummy as no NLS is used" #t)
|
||||
|
||||
(define (init-locale)
|
||||
"Dummy as no NLS is used"
|
||||
#t)
|
||||
|
68
runewidth/posix.scm
Normal file
68
runewidth/posix.scm
Normal file
|
@ -0,0 +1,68 @@
|
|||
(define-module (runewidth posix)
|
||||
#:use-module (ice-9 regex)
|
||||
#:export (east-asian?))
|
||||
|
||||
(define wide-locales
|
||||
(list
|
||||
"utf-8"
|
||||
"utf8"
|
||||
"jis"
|
||||
"eucjp"
|
||||
"euckr"
|
||||
"euccn"
|
||||
"sjis"
|
||||
"cp932"
|
||||
"cp51932"
|
||||
"cp936"
|
||||
"cp949"
|
||||
"cp950"
|
||||
"big5"
|
||||
"gbk"
|
||||
"gb2312"))
|
||||
|
||||
;; algorithm from:
|
||||
;; https://github.com/mattn/go-runewidth/blob/master/runewidth_posix.go
|
||||
|
||||
;; For extracting the charset part of the locale string (some locales require this)
|
||||
;; Note regex and capture group different as guile does not support 'non-capturing group' syntax
|
||||
(define charset-regexp (make-regexp "^[a-z][a-z][a-z]?(_[A-Z][A-Z])?\\.(.+)"))
|
||||
|
||||
;; POSIX, C, C-asdfs or C.asdfdsf
|
||||
(define c-or-posix-regexp (make-regexp "^(POSIX$|C($|[\\.-]))"))
|
||||
|
||||
(define (get-env-locale)
|
||||
(or (getenv "LC_ALL")
|
||||
(getenv "LC_CTYPE")
|
||||
(getenv "LANG")
|
||||
""))
|
||||
|
||||
(define* (east-asian? #:optional (locale (get-env-locale)))
|
||||
"Check if a given locale (or the currently installed locale from the environment) is considered an east asian locale"
|
||||
(unless (string? locale)
|
||||
(error "`locale' must be a string"))
|
||||
|
||||
(define charset
|
||||
(string-downcase
|
||||
(or
|
||||
;; Separating out locale from charset with our regex
|
||||
(let ((locale-match (regexp-exec charset-regexp locale)))
|
||||
(and
|
||||
(regexp-match? locale-match)
|
||||
(= 3 (match:count locale-match))
|
||||
(match:substring locale-match 2)))
|
||||
locale)))
|
||||
|
||||
(and
|
||||
(not (regexp-exec c-or-posix-regexp locale))
|
||||
(not (string-suffix? "@cjk_narrow" charset))
|
||||
(let ((index-@ (string-index charset #\@)))
|
||||
;; strip @foo from the end of the charset
|
||||
(when index-@
|
||||
(set! charset (substring/shared charset 0 index-@)))
|
||||
|
||||
(and (member charset wide-locales)
|
||||
(or
|
||||
(not (eq? #\u (string-ref charset 0)))
|
||||
(string-prefix? "ja" locale)
|
||||
(string-prefix? "ko" locale)
|
||||
(string-prefix? "zh" locale))))))
|
23
tests/test-posix.scm
Normal file
23
tests/test-posix.scm
Normal file
|
@ -0,0 +1,23 @@
|
|||
(define-module (tests test-posix)
|
||||
#:use-module (runewidth posix)
|
||||
#:use-module (srfi srfi-64))
|
||||
|
||||
(test-begin "test-posix")
|
||||
|
||||
(define (test-east expected charset)
|
||||
(test-equal expected (east-asian? charset)))
|
||||
|
||||
(define data
|
||||
'((#f "foo@cjk_narrow")
|
||||
(#f "foo@cjk")
|
||||
(#f "utf-8@cjk")
|
||||
(#f "C")
|
||||
(#f "POSIX")
|
||||
(#f "C.UTF-8")
|
||||
(#t "ja_JP.UTF-8")
|
||||
(#t "ja_JP.CP932")
|
||||
(#f "en_US.UTF-8")))
|
||||
|
||||
(for-each (λ (p) (apply test-east p)) data)
|
||||
|
||||
(test-end "test-posix")
|
Loading…
Reference in a new issue