first pass untested state machine
This commit is contained in:
parent
47146c887f
commit
1692a9fc05
1 changed files with 97 additions and 3 deletions
|
@ -1,8 +1,13 @@
|
||||||
(define-module (runewidth graphemes stream)
|
(define-module (runewidth graphemes stream)
|
||||||
|
#:use-module (ice-9 match)
|
||||||
#:use-module (srfi srfi-41)
|
#:use-module (srfi srfi-41)
|
||||||
#:use-module (srfi srfi-9 gnu)
|
#:use-module (srfi srfi-9 gnu)
|
||||||
#:export (make-grapheme
|
#:export (make-grapheme
|
||||||
|
grapheme?
|
||||||
|
grapheme-glyphs
|
||||||
|
grapheme-width
|
||||||
|
grapheme-sentence-end?
|
||||||
|
grapheme-word-end?
|
||||||
input->grapheme-stream))
|
input->grapheme-stream))
|
||||||
|
|
||||||
(define-immutable-record-type <grapheme>
|
(define-immutable-record-type <grapheme>
|
||||||
|
@ -17,6 +22,95 @@
|
||||||
(unless port
|
(unless port
|
||||||
(set! port (current-input-port)))
|
(set! port (current-input-port)))
|
||||||
|
|
||||||
(define gr (make-grapheme glyphs width sentence-end? word-end?))
|
;; The first is what state we are in, and the next is the grapheme
|
||||||
|
;; property of the current character.
|
||||||
|
;; Port of https://github.com/rivo/uniseg/blob/master/graphemerules.go
|
||||||
|
(define (state-machine cur-state cur-prop)
|
||||||
|
(match (list cur-state cur-prop)
|
||||||
|
;; Grapheme boundary #3
|
||||||
|
(('carriage-return 'line-feed)
|
||||||
|
(values 'control+line-feed 'no-boundary))
|
||||||
|
|
||||||
(stream-cons c (input->grapheme-stream port)))
|
;; Grapheme boundary #4
|
||||||
|
((or ('carriage-return _)
|
||||||
|
('control+line-feed _))
|
||||||
|
(values 'any 'boundary))
|
||||||
|
|
||||||
|
;; Grapheme boundary #5
|
||||||
|
((_ 'carriage-return)
|
||||||
|
(values 'carriage-return 'boundary))
|
||||||
|
((or (_ 'line-feed)
|
||||||
|
(_ 'control))
|
||||||
|
(values 'control+line-feed 'boundary))
|
||||||
|
|
||||||
|
;; Grapheme boundary #6
|
||||||
|
(('hangul-syllable-l 'hangul-syllable-l)
|
||||||
|
(values 'hangul-syllable-l 'no-boundary))
|
||||||
|
((or ('hangul-syllable-l 'hangul-syllable-v)
|
||||||
|
('hangul-syllable-l 'hangul-syllable-lv))
|
||||||
|
(values 'hangul-syllable-lv 'no-boundary))
|
||||||
|
(('hangul-syllable-l 'hangul-syllable-lvt)
|
||||||
|
(values 'hangul-syllable-lvt 'no-boundary))
|
||||||
|
((_ 'hangul-syllable-l)
|
||||||
|
(values 'hangul-syllable-l 'boundary))
|
||||||
|
|
||||||
|
;; Grapheme boundary #7
|
||||||
|
(('hangul-syllable-lv 'hangul-syllable-v)
|
||||||
|
(values 'hangul-syllable-lv 'no-boundary))
|
||||||
|
(('hangul-syllable-lv 'hangul-syllable-t)
|
||||||
|
(values 'hangul-syllable-lvt 'no-boundary))
|
||||||
|
((or (_ 'hangul-syllable-lv)
|
||||||
|
(_ 'hangul-syllable-v))
|
||||||
|
(values 'hangul-syllable-lv 'boundary))
|
||||||
|
|
||||||
|
;; Grapheme boundary #8
|
||||||
|
((or (_ 'hangul-syllable-lvt)
|
||||||
|
(_ 'hangul-syllable-t))
|
||||||
|
(values 'hangul-syllable-lvt 'boundary))
|
||||||
|
(('hangul-syllable-lvt 'hangul-syllable-t)
|
||||||
|
(values 'hangul-syllable-lvt 'no-boundary))
|
||||||
|
|
||||||
|
;; Grapheme boundary #9
|
||||||
|
((or (_ 'extend)
|
||||||
|
(_ 'zero-width-joiner))
|
||||||
|
(values 'any 'no-boundary))
|
||||||
|
|
||||||
|
;; Grapheme boundary #9a
|
||||||
|
((_ 'spacing-mark)
|
||||||
|
(values 'any 'no-boundary))
|
||||||
|
|
||||||
|
;; Grapheme boundary #9b
|
||||||
|
(('prepend _)
|
||||||
|
(values 'any 'no-boundary))
|
||||||
|
((_ 'prepend)
|
||||||
|
(values 'prepend 'boundary))
|
||||||
|
|
||||||
|
;; Grapheme boundary #11 (emoji!)
|
||||||
|
(('extended-pictographic 'extend)
|
||||||
|
(values 'extended-pictographic 'no-boundary))
|
||||||
|
(('extended-pictographic 'zero-width-joiner)
|
||||||
|
(values 'extended-pictographic+zero-width-joiner 'no-boundary))
|
||||||
|
(('extended-pictographic+zero-width-joiner 'extended-pictographic)
|
||||||
|
(values 'extended-pictographic 'no-boundary))
|
||||||
|
((_ 'extended-pictographic)
|
||||||
|
(values 'extended-pictographic 'boundary))
|
||||||
|
|
||||||
|
|
||||||
|
;; Grapheme boundaries #12 and #13
|
||||||
|
(('regional-indicator-odd 'regional-indicator)
|
||||||
|
(values 'regioinal-indicator-even 'no-boundary))
|
||||||
|
(('regional-indicator-even 'regional-indicator)
|
||||||
|
(values 'regional-indicator-odd 'boundary))
|
||||||
|
((_ 'regional-indicator)
|
||||||
|
(values 'regional-indicator-odd 'boundary))
|
||||||
|
|
||||||
|
(else (values 'other 'other))))
|
||||||
|
|
||||||
|
(define (transition-state state char)
|
||||||
|
"Given the current state and the next char, run a state transition"
|
||||||
|
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
(define grapheme (make-grapheme glyphs width sentence-end? word-end?))
|
||||||
|
(stream-cons grapheme (input->grapheme-stream port)))
|
||||||
|
|
Loading…
Reference in a new issue