117 lines
3.8 KiB
Scheme
117 lines
3.8 KiB
Scheme
(define-module (runewidth graphemes stream)
|
|
#:use-module (ice-9 match)
|
|
#:use-module (srfi srfi-41)
|
|
#:use-module (srfi srfi-9 gnu)
|
|
#:export (make-grapheme
|
|
grapheme?
|
|
grapheme-glyphs
|
|
grapheme-width
|
|
grapheme-sentence-end?
|
|
grapheme-word-end?
|
|
input->grapheme-stream))
|
|
|
|
(define-immutable-record-type <grapheme>
|
|
(make-grapheme glyphs width sentence-end? word-end?)
|
|
grapheme?
|
|
(glyphs grapheme-glyphs)
|
|
(width grapheme-width)
|
|
(sentence-end? grapheme-sentence-end?)
|
|
(word-end? grapheme-word-end?))
|
|
|
|
(define-stream (input->grapheme-stream port)
|
|
(unless port
|
|
(set! port (current-input-port)))
|
|
|
|
;; The first is what state we are in, and the next is the grapheme
|
|
;; property of the current character.
|
|
;; Port of https://github.com/rivo/uniseg/blob/master/graphemerules.go
|
|
(define (state-machine cur-state cur-prop)
|
|
(match (list cur-state cur-prop)
|
|
;; Grapheme boundary #3
|
|
(('carriage-return 'line-feed)
|
|
(values 'control+line-feed 'no-boundary))
|
|
|
|
;; Grapheme boundary #4
|
|
((or ('carriage-return _)
|
|
('control+line-feed _))
|
|
(values 'any 'boundary))
|
|
|
|
;; Grapheme boundary #5
|
|
((_ 'carriage-return)
|
|
(values 'carriage-return 'boundary))
|
|
((or (_ 'line-feed)
|
|
(_ 'control))
|
|
(values 'control+line-feed 'boundary))
|
|
|
|
;; Grapheme boundary #6
|
|
(('hangul-syllable-l 'hangul-syllable-l)
|
|
(values 'hangul-syllable-l 'no-boundary))
|
|
((or ('hangul-syllable-l 'hangul-syllable-v)
|
|
('hangul-syllable-l 'hangul-syllable-lv))
|
|
(values 'hangul-syllable-lv 'no-boundary))
|
|
(('hangul-syllable-l 'hangul-syllable-lvt)
|
|
(values 'hangul-syllable-lvt 'no-boundary))
|
|
((_ 'hangul-syllable-l)
|
|
(values 'hangul-syllable-l 'boundary))
|
|
|
|
;; Grapheme boundary #7
|
|
(('hangul-syllable-lv 'hangul-syllable-v)
|
|
(values 'hangul-syllable-lv 'no-boundary))
|
|
(('hangul-syllable-lv 'hangul-syllable-t)
|
|
(values 'hangul-syllable-lvt 'no-boundary))
|
|
((or (_ 'hangul-syllable-lv)
|
|
(_ 'hangul-syllable-v))
|
|
(values 'hangul-syllable-lv 'boundary))
|
|
|
|
;; Grapheme boundary #8
|
|
((or (_ 'hangul-syllable-lvt)
|
|
(_ 'hangul-syllable-t))
|
|
(values 'hangul-syllable-lvt 'boundary))
|
|
(('hangul-syllable-lvt 'hangul-syllable-t)
|
|
(values 'hangul-syllable-lvt 'no-boundary))
|
|
|
|
;; Grapheme boundary #9
|
|
((or (_ 'extend)
|
|
(_ 'zero-width-joiner))
|
|
(values 'any 'no-boundary))
|
|
|
|
;; Grapheme boundary #9a
|
|
((_ 'spacing-mark)
|
|
(values 'any 'no-boundary))
|
|
|
|
;; Grapheme boundary #9b
|
|
(('prepend _)
|
|
(values 'any 'no-boundary))
|
|
((_ 'prepend)
|
|
(values 'prepend 'boundary))
|
|
|
|
;; Grapheme boundary #11 (emoji!)
|
|
(('extended-pictographic 'extend)
|
|
(values 'extended-pictographic 'no-boundary))
|
|
(('extended-pictographic 'zero-width-joiner)
|
|
(values 'extended-pictographic+zero-width-joiner 'no-boundary))
|
|
(('extended-pictographic+zero-width-joiner 'extended-pictographic)
|
|
(values 'extended-pictographic 'no-boundary))
|
|
((_ 'extended-pictographic)
|
|
(values 'extended-pictographic 'boundary))
|
|
|
|
|
|
;; Grapheme boundaries #12 and #13
|
|
(('regional-indicator-odd 'regional-indicator)
|
|
(values 'regioinal-indicator-even 'no-boundary))
|
|
(('regional-indicator-even 'regional-indicator)
|
|
(values 'regional-indicator-odd 'boundary))
|
|
((_ 'regional-indicator)
|
|
(values 'regional-indicator-odd 'boundary))
|
|
|
|
(else (values 'other 'other))))
|
|
|
|
(define (transition-state state char)
|
|
"Given the current state and the next char, run a state transition"
|
|
|
|
|
|
)
|
|
|
|
(define grapheme (make-grapheme glyphs width sentence-end? word-end?))
|
|
(stream-cons grapheme (input->grapheme-stream port)))
|