guile-uniseg/uniseg.scm

61 lines
1.8 KiB
Scheme

(define-module (uniseg)
#:use-module (srfi srfi-1)
#:use-module (ice-9 match)
#:use-module (uniseg emoji)
#:use-module (uniseg graphemes)
#:use-module (uniseg eastasian)
#:export (emoji?
grapheme-property
eastasian-property))
(define (emoji? char)
(char-set-contains? char-set:extended-pictographic char))
(define (get-prop sets char)
(let ((pair
(find
(λ (p) (char-set-contains? (second p) char))
sets)))
(and pair
(first pair))))
(define (grapheme-property char)
"Find the unicode grapheme cluster property, as defined by https://www.unicode.org/reports/tr44/"
(or (get-prop grapheme-charsets char)
'other))
(define (eastasian-property char)
(define (<==> lo hi)
"Create a range predicate, inclusive between two numbers"
(λ (num)
(and (>= num lo)
(<= num hi))))
(define range-unified-ideographs-ext-a?
(<==> #x3400 #x4DBF))
(define range-unified-ideographs?
(<==> #x4E00 #x9FFF))
(define range-compatibility-ideographs?
(<==> #xF900 #xFAFF))
(define range-plane-2?
(<==> #x20000 #x2FFFD))
(define range-plane-3?
(<==> #x30000 #x3FFFD))
;; The spec defines any values not in the table be wide, if
;; they are in certain ranges. So let's take care of that here.
;; These characters are not in the char-sets as they are undesignated.
;; See comments in unicode's EastAsianWidth.txt
(or (get-prop eastasian-charsets char)
(let ((int (char->integer char)))
(match int
((or (? range-unified-ideographs-ext-a?)
(? range-unified-ideographs?)
(? range-compatibility-ideographs?)
(? range-plane-2?)
(? range-plane-3?))
'doublewidth)
(else
'neutral)))))