2024-03-03 18:13:22 +00:00
|
|
|
(define-module (uniseg)
|
|
|
|
#:use-module (srfi srfi-1)
|
|
|
|
#:use-module (ice-9 match)
|
|
|
|
#:use-module (uniseg emoji)
|
|
|
|
#:use-module (uniseg graphemes)
|
|
|
|
#:use-module (uniseg eastasian)
|
|
|
|
#:export (emoji?
|
|
|
|
grapheme-property
|
|
|
|
eastasian-property))
|
|
|
|
|
|
|
|
(define (emoji? char)
|
|
|
|
(char-set-contains? char-set:extended-pictographic char))
|
|
|
|
|
|
|
|
(define (get-prop sets char)
|
|
|
|
(let ((pair
|
|
|
|
(find
|
|
|
|
(λ (p) (char-set-contains? (second p) char))
|
|
|
|
sets)))
|
|
|
|
(and pair
|
|
|
|
(first pair))))
|
|
|
|
|
|
|
|
(define (grapheme-property char)
|
2024-03-03 18:27:11 +00:00
|
|
|
"Find the Unicode grapheme cluster property, as defined by https://www.unicode.org/reports/tr44/"
|
2024-03-03 18:13:22 +00:00
|
|
|
(or (get-prop grapheme-charsets char)
|
|
|
|
'other))
|
|
|
|
|
|
|
|
(define (eastasian-property char)
|
2024-03-03 18:27:11 +00:00
|
|
|
"Find the Unicode East Asian width property, as defined by https://www.unicode.org/reports/tr11/"
|
2024-03-03 18:13:22 +00:00
|
|
|
(define (<==> lo hi)
|
|
|
|
"Create a range predicate, inclusive between two numbers"
|
|
|
|
(λ (num)
|
|
|
|
(and (>= num lo)
|
|
|
|
(<= num hi))))
|
|
|
|
|
2024-03-03 18:27:11 +00:00
|
|
|
;; The spec requires that any values not in the table be double-width, if
|
|
|
|
;; they are in certain ranges. So let's take care of that here.
|
|
|
|
;; These characters are not in the char-sets as they are undesignated.
|
|
|
|
;; See comments in unicode's EastAsianWidth.txt
|
2024-03-03 18:13:22 +00:00
|
|
|
(define range-unified-ideographs-ext-a?
|
|
|
|
(<==> #x3400 #x4DBF))
|
|
|
|
(define range-unified-ideographs?
|
|
|
|
(<==> #x4E00 #x9FFF))
|
|
|
|
(define range-compatibility-ideographs?
|
|
|
|
(<==> #xF900 #xFAFF))
|
|
|
|
(define range-plane-2?
|
|
|
|
(<==> #x20000 #x2FFFD))
|
|
|
|
(define range-plane-3?
|
|
|
|
(<==> #x30000 #x3FFFD))
|
|
|
|
|
|
|
|
(or (get-prop eastasian-charsets char)
|
|
|
|
(let ((int (char->integer char)))
|
|
|
|
(match int
|
|
|
|
((or (? range-unified-ideographs-ext-a?)
|
|
|
|
(? range-unified-ideographs?)
|
|
|
|
(? range-compatibility-ideographs?)
|
|
|
|
(? range-plane-2?)
|
|
|
|
(? range-plane-3?))
|
|
|
|
'doublewidth)
|
|
|
|
(else
|
|
|
|
'neutral)))))
|