first pass untested state machine
This commit is contained in:
parent
47146c887f
commit
1692a9fc05
1 changed files with 97 additions and 3 deletions
|
@ -1,8 +1,13 @@
|
|||
(define-module (runewidth graphemes stream)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (srfi srfi-41)
|
||||
#:use-module (srfi srfi-9 gnu)
|
||||
#:export (make-grapheme
|
||||
|
||||
grapheme?
|
||||
grapheme-glyphs
|
||||
grapheme-width
|
||||
grapheme-sentence-end?
|
||||
grapheme-word-end?
|
||||
input->grapheme-stream))
|
||||
|
||||
(define-immutable-record-type <grapheme>
|
||||
|
@ -17,6 +22,95 @@
|
|||
(unless port
|
||||
(set! port (current-input-port)))
|
||||
|
||||
(define gr (make-grapheme glyphs width sentence-end? word-end?))
|
||||
;; The first is what state we are in, and the next is the grapheme
|
||||
;; property of the current character.
|
||||
;; Port of https://github.com/rivo/uniseg/blob/master/graphemerules.go
|
||||
(define (state-machine cur-state cur-prop)
|
||||
(match (list cur-state cur-prop)
|
||||
;; Grapheme boundary #3
|
||||
(('carriage-return 'line-feed)
|
||||
(values 'control+line-feed 'no-boundary))
|
||||
|
||||
(stream-cons c (input->grapheme-stream port)))
|
||||
;; Grapheme boundary #4
|
||||
((or ('carriage-return _)
|
||||
('control+line-feed _))
|
||||
(values 'any 'boundary))
|
||||
|
||||
;; Grapheme boundary #5
|
||||
((_ 'carriage-return)
|
||||
(values 'carriage-return 'boundary))
|
||||
((or (_ 'line-feed)
|
||||
(_ 'control))
|
||||
(values 'control+line-feed 'boundary))
|
||||
|
||||
;; Grapheme boundary #6
|
||||
(('hangul-syllable-l 'hangul-syllable-l)
|
||||
(values 'hangul-syllable-l 'no-boundary))
|
||||
((or ('hangul-syllable-l 'hangul-syllable-v)
|
||||
('hangul-syllable-l 'hangul-syllable-lv))
|
||||
(values 'hangul-syllable-lv 'no-boundary))
|
||||
(('hangul-syllable-l 'hangul-syllable-lvt)
|
||||
(values 'hangul-syllable-lvt 'no-boundary))
|
||||
((_ 'hangul-syllable-l)
|
||||
(values 'hangul-syllable-l 'boundary))
|
||||
|
||||
;; Grapheme boundary #7
|
||||
(('hangul-syllable-lv 'hangul-syllable-v)
|
||||
(values 'hangul-syllable-lv 'no-boundary))
|
||||
(('hangul-syllable-lv 'hangul-syllable-t)
|
||||
(values 'hangul-syllable-lvt 'no-boundary))
|
||||
((or (_ 'hangul-syllable-lv)
|
||||
(_ 'hangul-syllable-v))
|
||||
(values 'hangul-syllable-lv 'boundary))
|
||||
|
||||
;; Grapheme boundary #8
|
||||
((or (_ 'hangul-syllable-lvt)
|
||||
(_ 'hangul-syllable-t))
|
||||
(values 'hangul-syllable-lvt 'boundary))
|
||||
(('hangul-syllable-lvt 'hangul-syllable-t)
|
||||
(values 'hangul-syllable-lvt 'no-boundary))
|
||||
|
||||
;; Grapheme boundary #9
|
||||
((or (_ 'extend)
|
||||
(_ 'zero-width-joiner))
|
||||
(values 'any 'no-boundary))
|
||||
|
||||
;; Grapheme boundary #9a
|
||||
((_ 'spacing-mark)
|
||||
(values 'any 'no-boundary))
|
||||
|
||||
;; Grapheme boundary #9b
|
||||
(('prepend _)
|
||||
(values 'any 'no-boundary))
|
||||
((_ 'prepend)
|
||||
(values 'prepend 'boundary))
|
||||
|
||||
;; Grapheme boundary #11 (emoji!)
|
||||
(('extended-pictographic 'extend)
|
||||
(values 'extended-pictographic 'no-boundary))
|
||||
(('extended-pictographic 'zero-width-joiner)
|
||||
(values 'extended-pictographic+zero-width-joiner 'no-boundary))
|
||||
(('extended-pictographic+zero-width-joiner 'extended-pictographic)
|
||||
(values 'extended-pictographic 'no-boundary))
|
||||
((_ 'extended-pictographic)
|
||||
(values 'extended-pictographic 'boundary))
|
||||
|
||||
|
||||
;; Grapheme boundaries #12 and #13
|
||||
(('regional-indicator-odd 'regional-indicator)
|
||||
(values 'regioinal-indicator-even 'no-boundary))
|
||||
(('regional-indicator-even 'regional-indicator)
|
||||
(values 'regional-indicator-odd 'boundary))
|
||||
((_ 'regional-indicator)
|
||||
(values 'regional-indicator-odd 'boundary))
|
||||
|
||||
(else (values 'other 'other))))
|
||||
|
||||
(define (transition-state state char)
|
||||
"Given the current state and the next char, run a state transition"
|
||||
|
||||
|
||||
)
|
||||
|
||||
(define grapheme (make-grapheme glyphs width sentence-end? word-end?))
|
||||
(stream-cons grapheme (input->grapheme-stream port)))
|
||||
|
|
Loading…
Reference in a new issue