first pass untested state machine

This commit is contained in:
Vivianne 2024-03-03 14:20:02 -05:00
parent 47146c887f
commit 1692a9fc05

View file

@ -1,8 +1,13 @@
(define-module (runewidth graphemes stream) (define-module (runewidth graphemes stream)
#:use-module (ice-9 match)
#:use-module (srfi srfi-41) #:use-module (srfi srfi-41)
#:use-module (srfi srfi-9 gnu) #:use-module (srfi srfi-9 gnu)
#:export (make-grapheme #:export (make-grapheme
grapheme?
grapheme-glyphs
grapheme-width
grapheme-sentence-end?
grapheme-word-end?
input->grapheme-stream)) input->grapheme-stream))
(define-immutable-record-type <grapheme> (define-immutable-record-type <grapheme>
@ -17,6 +22,95 @@
(unless port (unless port
(set! port (current-input-port))) (set! port (current-input-port)))
(define gr (make-grapheme glyphs width sentence-end? word-end?)) ;; The first is what state we are in, and the next is the grapheme
;; property of the current character.
;; Port of https://github.com/rivo/uniseg/blob/master/graphemerules.go
(define (state-machine cur-state cur-prop)
(match (list cur-state cur-prop)
;; Grapheme boundary #3
(('carriage-return 'line-feed)
(values 'control+line-feed 'no-boundary))
(stream-cons c (input->grapheme-stream port))) ;; Grapheme boundary #4
((or ('carriage-return _)
('control+line-feed _))
(values 'any 'boundary))
;; Grapheme boundary #5
((_ 'carriage-return)
(values 'carriage-return 'boundary))
((or (_ 'line-feed)
(_ 'control))
(values 'control+line-feed 'boundary))
;; Grapheme boundary #6
(('hangul-syllable-l 'hangul-syllable-l)
(values 'hangul-syllable-l 'no-boundary))
((or ('hangul-syllable-l 'hangul-syllable-v)
('hangul-syllable-l 'hangul-syllable-lv))
(values 'hangul-syllable-lv 'no-boundary))
(('hangul-syllable-l 'hangul-syllable-lvt)
(values 'hangul-syllable-lvt 'no-boundary))
((_ 'hangul-syllable-l)
(values 'hangul-syllable-l 'boundary))
;; Grapheme boundary #7
(('hangul-syllable-lv 'hangul-syllable-v)
(values 'hangul-syllable-lv 'no-boundary))
(('hangul-syllable-lv 'hangul-syllable-t)
(values 'hangul-syllable-lvt 'no-boundary))
((or (_ 'hangul-syllable-lv)
(_ 'hangul-syllable-v))
(values 'hangul-syllable-lv 'boundary))
;; Grapheme boundary #8
((or (_ 'hangul-syllable-lvt)
(_ 'hangul-syllable-t))
(values 'hangul-syllable-lvt 'boundary))
(('hangul-syllable-lvt 'hangul-syllable-t)
(values 'hangul-syllable-lvt 'no-boundary))
;; Grapheme boundary #9
((or (_ 'extend)
(_ 'zero-width-joiner))
(values 'any 'no-boundary))
;; Grapheme boundary #9a
((_ 'spacing-mark)
(values 'any 'no-boundary))
;; Grapheme boundary #9b
(('prepend _)
(values 'any 'no-boundary))
((_ 'prepend)
(values 'prepend 'boundary))
;; Grapheme boundary #11 (emoji!)
(('extended-pictographic 'extend)
(values 'extended-pictographic 'no-boundary))
(('extended-pictographic 'zero-width-joiner)
(values 'extended-pictographic+zero-width-joiner 'no-boundary))
(('extended-pictographic+zero-width-joiner 'extended-pictographic)
(values 'extended-pictographic 'no-boundary))
((_ 'extended-pictographic)
(values 'extended-pictographic 'boundary))
;; Grapheme boundaries #12 and #13
(('regional-indicator-odd 'regional-indicator)
(values 'regioinal-indicator-even 'no-boundary))
(('regional-indicator-even 'regional-indicator)
(values 'regional-indicator-odd 'boundary))
((_ 'regional-indicator)
(values 'regional-indicator-odd 'boundary))
(else (values 'other 'other))))
(define (transition-state state char)
"Given the current state and the next char, run a state transition"
)
(define grapheme (make-grapheme glyphs width sentence-end? word-end?))
(stream-cons grapheme (input->grapheme-stream port)))