(define-module (uniseg) #:use-module (srfi srfi-1) #:use-module (ice-9 match) #:use-module (uniseg emoji) #:use-module (uniseg graphemes) #:use-module (uniseg eastasian) #:export (emoji? grapheme-property eastasian-property)) (define (emoji? char) (char-set-contains? char-set:extended-pictographic char)) (define (get-prop sets char) (let ((pair (find (λ (p) (char-set-contains? (second p) char)) sets))) (and pair (first pair)))) (define (grapheme-property char) "Find the Unicode grapheme cluster property, as defined by https://www.unicode.org/reports/tr44/" (or (get-prop grapheme-charsets char) 'other)) (define (eastasian-property char) "Find the Unicode East Asian width property, as defined by https://www.unicode.org/reports/tr11/" (define (<==> lo hi) "Create a range predicate, inclusive between two numbers" (λ (num) (and (>= num lo) (<= num hi)))) ;; The spec requires that any values not in the table be double-width, if ;; they are in certain ranges. So let's take care of that here. ;; These characters are not in the char-sets as they are undesignated. ;; See comments in unicode's EastAsianWidth.txt (define range-unified-ideographs-ext-a? (<==> #x3400 #x4DBF)) (define range-unified-ideographs? (<==> #x4E00 #x9FFF)) (define range-compatibility-ideographs? (<==> #xF900 #xFAFF)) (define range-plane-2? (<==> #x20000 #x2FFFD)) (define range-plane-3? (<==> #x30000 #x3FFFD)) (or (get-prop eastasian-charsets char) (let ((int (char->integer char))) (match int ((or (? range-unified-ideographs-ext-a?) (? range-unified-ideographs?) (? range-compatibility-ideographs?) (? range-plane-2?) (? range-plane-3?)) 'doublewidth) (else 'neutral)))))