Holy shit it works
This commit is contained in:
parent
1692a9fc05
commit
e53876be52
|
@ -112,8 +112,7 @@
|
|||
#:use-module (ice-9 hash-table)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (uniseg internal)
|
||||
#:export
|
||||
(,@ea-symbol-names
|
||||
#:export (,@ea-symbol-names
|
||||
eastasian-charsets)))
|
||||
|
||||
(pretty-print
|
||||
|
|
|
@ -27,9 +27,38 @@
|
|||
(define-peg-pattern @emoji-line body
|
||||
(and @emoji-datum (* @ws) @comment))
|
||||
|
||||
(define emoji-list '())
|
||||
(define emoji-ht (make-hash-table 5))
|
||||
|
||||
(define emoji-sets
|
||||
'(emoji
|
||||
emoji-presentation
|
||||
emoji-modifier
|
||||
emoji-modifier-base
|
||||
emoji-component
|
||||
emoji-extended-pictographic))
|
||||
|
||||
(define emoji-symbol-names
|
||||
(map
|
||||
(λ (set)
|
||||
(string->symbol
|
||||
(string-concatenate
|
||||
(list "char-set:"
|
||||
(symbol->string set)))))
|
||||
emoji-sets))
|
||||
|
||||
(define emoji-sets-and-symbols
|
||||
(zip emoji-sets emoji-symbol-names))
|
||||
|
||||
(define (process-emoji-line line)
|
||||
(define (string->category str)
|
||||
(match str
|
||||
("Emoji" 'emoji)
|
||||
("Emoji_Presentation" 'emoji-presentation)
|
||||
("Emoji_Modifier" 'emoji-modifier)
|
||||
("Emoji_Modifier_Base" 'emoji-modifier-base)
|
||||
("Emoji_Component" 'emoji-component)
|
||||
("Extended_Pictographic" 'emoji-extended-pictographic)))
|
||||
|
||||
(define tree (peg:tree (match-pattern @emoji-line line)))
|
||||
|
||||
(unless (or (not tree)
|
||||
|
@ -39,7 +68,7 @@
|
|||
(match tree
|
||||
(((('@codepoint-range
|
||||
('@codepoint codepoints) ...)
|
||||
('@emoji-category category))
|
||||
('@emoji-category cat-str))
|
||||
('@comment comment))
|
||||
|
||||
(with-exception-handler
|
||||
|
@ -48,15 +77,14 @@
|
|||
(format-exception-msg stdout err))
|
||||
(λ ()
|
||||
(let ((f (hex-string->integer (first codepoints)))
|
||||
(l (hex-string->integer (last codepoints))))
|
||||
(l (hex-string->integer (last codepoints)))
|
||||
(category (string->category cat-str)))
|
||||
|
||||
(when (or (in-surrogate-range f)
|
||||
(in-surrogate-range l))
|
||||
(error (format #f "chars in surrogate range ~x -> ~x" f l)))
|
||||
|
||||
(when (and (equal? "Extended_Pictographic" category)
|
||||
(> l #xFF))
|
||||
(set! emoji-list (cons (list f l) emoji-list)))))
|
||||
(cons-hash-list! emoji-ht category f l)))
|
||||
#:unwind? #t)))))
|
||||
|
||||
(define line-func
|
||||
|
@ -78,27 +106,45 @@
|
|||
|
||||
(pretty-print
|
||||
`(define-module (uniseg emoji)
|
||||
#:use-module (uniseg internal)
|
||||
#:use-module (ice-9 hash-table)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:export (char-set:extended-pictographic)))
|
||||
#:export (,@emoji-symbol-names
|
||||
emoji-charsets)))
|
||||
|
||||
(pretty-print
|
||||
`(define emoji-list ',emoji-list))
|
||||
`(define emoji-ht
|
||||
(alist->hashq-table ',(hash-map->list cons emoji-ht))))
|
||||
|
||||
(display "\n")
|
||||
|
||||
(for-each
|
||||
(λ (sym)
|
||||
(pretty-print
|
||||
`(define ,sym (char-set))))
|
||||
emoji-symbol-names)
|
||||
|
||||
(display "\n")
|
||||
|
||||
(pretty-print
|
||||
`(define char-set:extended-pictographic (char-set)))
|
||||
|
||||
(display "\n")
|
||||
|
||||
(pretty-print
|
||||
`(for-each
|
||||
`(define emoji-charsets
|
||||
(list
|
||||
,@(map
|
||||
(λ (pair)
|
||||
(ucs-range->char-set!
|
||||
(first pair)
|
||||
(+ 1 (second pair))
|
||||
#t char-set:extended-pictographic))
|
||||
emoji-list))
|
||||
(let ((f (first pair))
|
||||
(s (second pair)))
|
||||
`(list ',f ,s)))
|
||||
emoji-sets-and-symbols))))
|
||||
|
||||
(display "\n")
|
||||
|
||||
(for-each
|
||||
(λ (set-pair)
|
||||
(let ((name (first set-pair))
|
||||
(symbol (second set-pair)))
|
||||
(pretty-print
|
||||
`(ranges->charset! emoji-ht ',name ,symbol))))
|
||||
emoji-sets-and-symbols)
|
||||
|
||||
(display "Code generation complete.\n" stdout)))
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
extend
|
||||
regional-indicator
|
||||
spacing-mark
|
||||
zerowidth-joiner))
|
||||
zero-width-joiner))
|
||||
|
||||
(define grapheme-symbol-names
|
||||
(map
|
||||
|
@ -71,7 +71,7 @@
|
|||
("Extend" 'extend)
|
||||
("Regional_Indicator" 'regional-indicator)
|
||||
("SpacingMark" 'spacing-mark)
|
||||
("ZWJ" 'zerowidth-joiner)))
|
||||
("ZWJ" 'zero-width-joiner)))
|
||||
|
||||
(define tree (peg:tree (match-pattern @grapheme-line line)))
|
||||
|
||||
|
@ -123,7 +123,7 @@
|
|||
#:use-module (ice-9 hash-table)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (uniseg internal)
|
||||
#:export ( ,@grapheme-symbol-names
|
||||
#:export (,@grapheme-symbol-names
|
||||
grapheme-charsets)))
|
||||
|
||||
|
||||
|
|
60
uniseg.scm
60
uniseg.scm
|
@ -1,17 +1,21 @@
|
|||
(define-module (uniseg)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (srfi srfi-41)
|
||||
#:use-module (uniseg emoji)
|
||||
#:use-module (uniseg graphemes)
|
||||
#:use-module (uniseg graphemes stream)
|
||||
#:use-module (uniseg eastasian)
|
||||
#:export (emoji?
|
||||
grapheme-property
|
||||
eastasian-property))
|
||||
char->grapheme-property
|
||||
char->eastasian-property
|
||||
char-width
|
||||
string-width))
|
||||
|
||||
(define (emoji? char)
|
||||
(char-set-contains? char-set:extended-pictographic char))
|
||||
(char-set-contains? char-set:emoji-extended-pictographic char))
|
||||
|
||||
(define (get-prop sets char)
|
||||
(define (char->prop sets char)
|
||||
(let ((pair
|
||||
(find
|
||||
(λ (p) (char-set-contains? (second p) char))
|
||||
|
@ -19,12 +23,12 @@
|
|||
(and pair
|
||||
(first pair))))
|
||||
|
||||
(define (grapheme-property char)
|
||||
(define (char->grapheme-property char)
|
||||
"Find the Unicode grapheme cluster property, as defined by https://www.unicode.org/reports/tr44/"
|
||||
(or (get-prop grapheme-charsets char)
|
||||
(or (char->prop grapheme-charsets char)
|
||||
'other))
|
||||
|
||||
(define (eastasian-property char)
|
||||
(define (char->eastasian-property char)
|
||||
"Find the Unicode East Asian width property, as defined by https://www.unicode.org/reports/tr11/"
|
||||
(define (<==> lo hi)
|
||||
"Create a range predicate, inclusive between two numbers"
|
||||
|
@ -47,7 +51,7 @@
|
|||
(define range-plane-3?
|
||||
(<==> #x30000 #x3FFFD))
|
||||
|
||||
(or (get-prop eastasian-charsets char)
|
||||
(or (char->prop eastasian-charsets char)
|
||||
(let ((int (char->integer char)))
|
||||
(match int
|
||||
((or (? range-unified-ideographs-ext-a?)
|
||||
|
@ -58,3 +62,43 @@
|
|||
'doublewidth)
|
||||
(else
|
||||
'neutral)))))
|
||||
|
||||
(define (char-width rune)
|
||||
"Convert from a character to its visible width. Returns the width as well as the grapheme property to reduce calculations"
|
||||
(define grapheme-prop (char->grapheme-property rune))
|
||||
(values
|
||||
(or
|
||||
(match grapheme-prop
|
||||
((or 'control
|
||||
'carriage-return
|
||||
'line-feed
|
||||
'extend
|
||||
'zero-width-joiner)
|
||||
0)
|
||||
('regional-indicator
|
||||
2)
|
||||
('extended-pictographic
|
||||
(if
|
||||
(char-set-contains? char-set:emoji-presentation rune)
|
||||
2
|
||||
1))
|
||||
(else #f)
|
||||
)
|
||||
(match (char->integer rune)
|
||||
(#x2E3A 3)
|
||||
(#x2E3B 4)
|
||||
(else #f))
|
||||
(match (char->eastasian-property rune)
|
||||
('doublewidth 2)
|
||||
;; Default for monospace is to render ambiguous as 1
|
||||
('ambiguous 1)
|
||||
(else 1)))
|
||||
grapheme-prop))
|
||||
|
||||
(define (string-width str)
|
||||
"Get the width of a string by adding up the widths of each grapheme"
|
||||
(stream-fold
|
||||
(λ (val grapheme)
|
||||
(+ val (grapheme-width grapheme)))
|
||||
0
|
||||
(string->grapheme-stream str)))
|
||||
|
|
1182
uniseg/eastasian.scm
1182
uniseg/eastasian.scm
File diff suppressed because it is too large
Load Diff
774
uniseg/emoji.scm
774
uniseg/emoji.scm
|
@ -3,11 +3,728 @@
|
|||
(define-module
|
||||
(uniseg emoji)
|
||||
#:use-module
|
||||
(uniseg internal)
|
||||
#:use-module
|
||||
(ice-9 hash-table)
|
||||
#:use-module
|
||||
(srfi srfi-1)
|
||||
#:export
|
||||
(char-set:extended-pictographic))
|
||||
(define emoji-list
|
||||
'((130048 131069)
|
||||
(char-set:emoji
|
||||
char-set:emoji-presentation
|
||||
char-set:emoji-modifier
|
||||
char-set:emoji-modifier-base
|
||||
char-set:emoji-component
|
||||
char-set:emoji-extended-pictographic
|
||||
emoji-charsets))
|
||||
(define emoji-ht
|
||||
(alist->hashq-table
|
||||
'((emoji-modifier (127995 127999))
|
||||
(emoji-component
|
||||
(917536 917631)
|
||||
(129456 129459)
|
||||
(127995 127999)
|
||||
(127462 127487)
|
||||
(65039 65039)
|
||||
(8419 8419)
|
||||
(8205 8205)
|
||||
(48 57)
|
||||
(42 42)
|
||||
(35 35))
|
||||
(emoji-modifier-base
|
||||
(129489 129501)
|
||||
(129485 129487)
|
||||
(129467 129467)
|
||||
(129464 129465)
|
||||
(129461 129462)
|
||||
(129399 129399)
|
||||
(129340 129342)
|
||||
(129331 129337)
|
||||
(129329 129330)
|
||||
(129328 129328)
|
||||
(129318 129318)
|
||||
(129311 129311)
|
||||
(129305 129310)
|
||||
(129304 129304)
|
||||
(129295 129295)
|
||||
(129292 129292)
|
||||
(128716 128716)
|
||||
(128704 128704)
|
||||
(128694 128694)
|
||||
(128692 128693)
|
||||
(128675 128675)
|
||||
(128587 128591)
|
||||
(128581 128583)
|
||||
(128405 128406)
|
||||
(128400 128400)
|
||||
(128378 128378)
|
||||
(128372 128373)
|
||||
(128170 128170)
|
||||
(128145 128145)
|
||||
(128143 128143)
|
||||
(128133 128135)
|
||||
(128129 128131)
|
||||
(128124 128124)
|
||||
(128110 128120)
|
||||
(128108 128109)
|
||||
(128102 128107)
|
||||
(128070 128080)
|
||||
(128066 128067)
|
||||
(127947 127948)
|
||||
(127946 127946)
|
||||
(127943 127943)
|
||||
(127938 127940)
|
||||
(127877 127877)
|
||||
(9997 9997)
|
||||
(9994 9996)
|
||||
(9977 9977)
|
||||
(9757 9757))
|
||||
(emoji (129744 129750)
|
||||
(129728 129730)
|
||||
(129712 129718)
|
||||
(129686 129704)
|
||||
(129680 129685)
|
||||
(129667 129670)
|
||||
(129664 129666)
|
||||
(129656 129658)
|
||||
(129652 129652)
|
||||
(129648 129651)
|
||||
(129511 129535)
|
||||
(129488 129510)
|
||||
(129485 129487)
|
||||
(129483 129483)
|
||||
(129475 129482)
|
||||
(129473 129474)
|
||||
(129472 129472)
|
||||
(129466 129471)
|
||||
(129456 129465)
|
||||
(129454 129455)
|
||||
(129451 129453)
|
||||
(129445 129450)
|
||||
(129443 129444)
|
||||
(129432 129442)
|
||||
(129426 129431)
|
||||
(129413 129425)
|
||||
(129408 129412)
|
||||
(129404 129407)
|
||||
(129403 129403)
|
||||
(129402 129402)
|
||||
(129399 129400)
|
||||
(129395 129398)
|
||||
(129394 129394)
|
||||
(129393 129393)
|
||||
(129388 129392)
|
||||
(129375 129387)
|
||||
(129360 129374)
|
||||
(129357 129359)
|
||||
(129356 129356)
|
||||
(129351 129355)
|
||||
(129344 129349)
|
||||
(129343 129343)
|
||||
(129340 129342)
|
||||
(129331 129338)
|
||||
(129329 129330)
|
||||
(129328 129328)
|
||||
(129320 129327)
|
||||
(129312 129319)
|
||||
(129311 129311)
|
||||
(129305 129310)
|
||||
(129296 129304)
|
||||
(129293 129295)
|
||||
(129292 129292)
|
||||
(128992 129003)
|
||||
(128763 128764)
|
||||
(128762 128762)
|
||||
(128761 128761)
|
||||
(128759 128760)
|
||||
(128756 128758)
|
||||
(128755 128755)
|
||||
(128752 128752)
|
||||
(128747 128748)
|
||||
(128745 128745)
|
||||
(128736 128741)
|
||||
(128726 128727)
|
||||
(128725 128725)
|
||||
(128721 128722)
|
||||
(128720 128720)
|
||||
(128717 128719)
|
||||
(128716 128716)
|
||||
(128715 128715)
|
||||
(128705 128709)
|
||||
(128704 128704)
|
||||
(128703 128703)
|
||||
(128697 128702)
|
||||
(128695 128696)
|
||||
(128694 128694)
|
||||
(128691 128693)
|
||||
(128690 128690)
|
||||
(128686 128689)
|
||||
(128679 128685)
|
||||
(128678 128678)
|
||||
(128676 128677)
|
||||
(128675 128675)
|
||||
(128674 128674)
|
||||
(128667 128673)
|
||||
(128665 128666)
|
||||
(128664 128664)
|
||||
(128663 128663)
|
||||
(128662 128662)
|
||||
(128661 128661)
|
||||
(128660 128660)
|
||||
(128657 128659)
|
||||
(128656 128656)
|
||||
(128655 128655)
|
||||
(128654 128654)
|
||||
(128653 128653)
|
||||
(128652 128652)
|
||||
(128650 128651)
|
||||
(128649 128649)
|
||||
(128648 128648)
|
||||
(128647 128647)
|
||||
(128646 128646)
|
||||
(128643 128645)
|
||||
(128641 128642)
|
||||
(128640 128640)
|
||||
(128581 128591)
|
||||
(128577 128580)
|
||||
(128567 128576)
|
||||
(128566 128566)
|
||||
(128565 128565)
|
||||
(128564 128564)
|
||||
(128560 128563)
|
||||
(128558 128559)
|
||||
(128557 128557)
|
||||
(128556 128556)
|
||||
(128552 128555)
|
||||
(128550 128551)
|
||||
(128544 128549)
|
||||
(128543 128543)
|
||||
(128540 128542)
|
||||
(128539 128539)
|
||||
(128538 128538)
|
||||
(128537 128537)
|
||||
(128536 128536)
|
||||
(128535 128535)
|
||||
(128534 128534)
|
||||
(128533 128533)
|
||||
(128530 128532)
|
||||
(128529 128529)
|
||||
(128528 128528)
|
||||
(128527 128527)
|
||||
(128526 128526)
|
||||
(128521 128525)
|
||||
(128519 128520)
|
||||
(128513 128518)
|
||||
(128512 128512)
|
||||
(128507 128511)
|
||||
(128506 128506)
|
||||
(128499 128499)
|
||||
(128495 128495)
|
||||
(128488 128488)
|
||||
(128483 128483)
|
||||
(128481 128481)
|
||||
(128476 128478)
|
||||
(128465 128467)
|
||||
(128450 128452)
|
||||
(128444 128444)
|
||||
(128433 128434)
|
||||
(128424 128424)
|
||||
(128421 128421)
|
||||
(128420 128420)
|
||||
(128405 128406)
|
||||
(128400 128400)
|
||||
(128394 128397)
|
||||
(128391 128391)
|
||||
(128378 128378)
|
||||
(128371 128377)
|
||||
(128367 128368)
|
||||
(128348 128359)
|
||||
(128336 128347)
|
||||
(128331 128334)
|
||||
(128329 128330)
|
||||
(128302 128317)
|
||||
(128300 128301)
|
||||
(128278 128299)
|
||||
(128277 128277)
|
||||
(128266 128276)
|
||||
(128265 128265)
|
||||
(128264 128264)
|
||||
(128260 128263)
|
||||
(128259 128259)
|
||||
(128255 128258)
|
||||
(128253 128253)
|
||||
(128249 128252)
|
||||
(128248 128248)
|
||||
(128246 128247)
|
||||
(128245 128245)
|
||||
(128240 128244)
|
||||
(128239 128239)
|
||||
(128238 128238)
|
||||
(128236 128237)
|
||||
(128184 128235)
|
||||
(128182 128183)
|
||||
(128174 128181)
|
||||
(128173 128173)
|
||||
(128110 128172)
|
||||
(128108 128109)
|
||||
(128102 128107)
|
||||
(128101 128101)
|
||||
(128066 128100)
|
||||
(128065 128065)
|
||||
(128064 128064)
|
||||
(128063 128063)
|
||||
(128043 128062)
|
||||
(128042 128042)
|
||||
(128023 128041)
|
||||
(128022 128022)
|
||||
(128021 128021)
|
||||
(128020 128020)
|
||||
(128019 128019)
|
||||
(128017 128018)
|
||||
(128015 128016)
|
||||
(128012 128014)
|
||||
(128009 128011)
|
||||
(128008 128008)
|
||||
(127992 128007)
|
||||
(127991 127991)
|
||||
(127989 127989)
|
||||
(127988 127988)
|
||||
(127987 127987)
|
||||
(127973 127984)
|
||||
(127972 127972)
|
||||
(127968 127971)
|
||||
(127956 127967)
|
||||
(127951 127955)
|
||||
(127947 127950)
|
||||
(127946 127946)
|
||||
(127945 127945)
|
||||
(127944 127944)
|
||||
(127943 127943)
|
||||
(127942 127942)
|
||||
(127941 127941)
|
||||
(127904 127940)
|
||||
(127902 127903)
|
||||
(127897 127899)
|
||||
(127894 127895)
|
||||
(127872 127891)
|
||||
(127870 127871)
|
||||
(127869 127869)
|
||||
(127868 127868)
|
||||
(127825 127867)
|
||||
(127824 127824)
|
||||
(127820 127823)
|
||||
(127819 127819)
|
||||
(127799 127818)
|
||||
(127798 127798)
|
||||
(127796 127797)
|
||||
(127794 127795)
|
||||
(127792 127793)
|
||||
(127789 127791)
|
||||
(127780 127788)
|
||||
(127777 127777)
|
||||
(127775 127776)
|
||||
(127773 127774)
|
||||
(127772 127772)
|
||||
(127771 127771)
|
||||
(127770 127770)
|
||||
(127769 127769)
|
||||
(127766 127768)
|
||||
(127763 127765)
|
||||
(127762 127762)
|
||||
(127761 127761)
|
||||
(127760 127760)
|
||||
(127759 127759)
|
||||
(127757 127758)
|
||||
(127744 127756)
|
||||
(127568 127569)
|
||||
(127538 127546)
|
||||
(127535 127535)
|
||||
(127514 127514)
|
||||
(127489 127490)
|
||||
(127462 127487)
|
||||
(127377 127386)
|
||||
(127374 127374)
|
||||
(127358 127359)
|
||||
(127344 127345)
|
||||
(127183 127183)
|
||||
(126980 126980)
|
||||
(12953 12953)
|
||||
(12951 12951)
|
||||
(12349 12349)
|
||||
(12336 12336)
|
||||
(11093 11093)
|
||||
(11088 11088)
|
||||
(11035 11036)
|
||||
(11013 11015)
|
||||
(10548 10549)
|
||||
(10175 10175)
|
||||
(10160 10160)
|
||||
(10145 10145)
|
||||
(10133 10135)
|
||||
(10084 10084)
|
||||
(10083 10083)
|
||||
(10071 10071)
|
||||
(10067 10069)
|
||||
(10062 10062)
|
||||
(10060 10060)
|
||||
(10055 10055)
|
||||
(10052 10052)
|
||||
(10035 10036)
|
||||
(10024 10024)
|
||||
(10017 10017)
|
||||
(10013 10013)
|
||||
(10006 10006)
|
||||
(10004 10004)
|
||||
(10002 10002)
|
||||
(9999 9999)
|
||||
(9997 9997)
|
||||
(9992 9996)
|
||||
(9989 9989)
|
||||
(9986 9986)
|
||||
(9981 9981)
|
||||
(9978 9978)
|
||||
(9975 9977)
|
||||
(9973 9973)
|
||||
(9972 9972)
|
||||
(9970 9971)
|
||||
(9968 9969)
|
||||
(9962 9962)
|
||||
(9961 9961)
|
||||
(9940 9940)
|
||||
(9939 9939)
|
||||
(9937 9937)
|
||||
(9935 9935)
|
||||
(9934 9934)
|
||||
(9928 9928)
|
||||
(9924 9925)
|
||||
(9917 9918)
|
||||
(9904 9905)
|
||||
(9898 9899)
|
||||
(9895 9895)
|
||||
(9888 9889)
|
||||
(9883 9884)
|
||||
(9881 9881)
|
||||
(9878 9879)
|
||||
(9877 9877)
|
||||
(9876 9876)
|
||||
(9875 9875)
|
||||
(9874 9874)
|
||||
(9855 9855)
|
||||
(9854 9854)
|
||||
(9851 9851)
|
||||
(9832 9832)
|
||||
(9829 9830)
|
||||
(9827 9827)
|
||||
(9824 9824)
|
||||
(9823 9823)
|
||||
(9800 9811)
|
||||
(9794 9794)
|
||||
(9792 9792)
|
||||
(9786 9786)
|
||||
(9784 9785)
|
||||
(9775 9775)
|
||||
(9774 9774)
|
||||
(9770 9770)
|
||||
(9766 9766)
|
||||
(9762 9763)
|
||||
(9760 9760)
|
||||
(9757 9757)
|
||||
(9752 9752)
|
||||
(9748 9749)
|
||||
(9745 9745)
|
||||
(9742 9742)
|
||||
(9732 9732)
|
||||
(9730 9731)
|
||||
(9728 9729)
|
||||
(9723 9726)
|
||||
(9664 9664)
|
||||
(9654 9654)
|
||||
(9642 9643)
|
||||
(9410 9410)
|
||||
(9208 9210)
|
||||
(9203 9203)
|
||||
(9201 9202)
|
||||
(9200 9200)
|
||||
(9199 9199)
|
||||
(9197 9198)
|
||||
(9193 9196)
|
||||
(9167 9167)
|
||||
(9000 9000)
|
||||
(8986 8987)
|
||||
(8617 8618)
|
||||
(8596 8601)
|
||||
(8505 8505)
|
||||
(8482 8482)
|
||||
(8265 8265)
|
||||
(8252 8252)
|
||||
(174 174)
|
||||
(169 169)
|
||||
(48 57)
|
||||
(42 42)
|
||||
(35 35))
|
||||
(emoji-presentation
|
||||
(129744 129750)
|
||||
(129728 129730)
|
||||
(129712 129718)
|
||||
(129686 129704)
|
||||
(129680 129685)
|
||||
(129667 129670)
|
||||
(129664 129666)
|
||||
(129656 129658)
|
||||
(129652 129652)
|
||||
(129648 129651)
|
||||
(129511 129535)
|
||||
(129488 129510)
|
||||
(129485 129487)
|
||||
(129483 129483)
|
||||
(129475 129482)
|
||||
(129473 129474)
|
||||
(129472 129472)
|
||||
(129466 129471)
|
||||
(129456 129465)
|
||||
(129454 129455)
|
||||
(129451 129453)
|
||||
(129445 129450)
|
||||
(129443 129444)
|
||||
(129432 129442)
|
||||
(129426 129431)
|
||||
(129413 129425)
|
||||
(129408 129412)
|
||||
(129404 129407)
|
||||
(129403 129403)
|
||||
(129402 129402)
|
||||
(129399 129400)
|
||||
(129395 129398)
|
||||
(129394 129394)
|
||||
(129393 129393)
|
||||
(129388 129392)
|
||||
(129375 129387)
|
||||
(129360 129374)
|
||||
(129357 129359)
|
||||
(129356 129356)
|
||||
(129351 129355)
|
||||
(129344 129349)
|
||||
(129343 129343)
|
||||
(129340 129342)
|
||||
(129331 129338)
|
||||
(129329 129330)
|
||||
(129328 129328)
|
||||
(129320 129327)
|
||||
(129312 129319)
|
||||
(129311 129311)
|
||||
(129305 129310)
|
||||
(129296 129304)
|
||||
(129293 129295)
|
||||
(129292 129292)
|
||||
(128992 129003)
|
||||
(128763 128764)
|
||||
(128762 128762)
|
||||
(128761 128761)
|
||||
(128759 128760)
|
||||
(128756 128758)
|
||||
(128747 128748)
|
||||
(128726 128727)
|
||||
(128725 128725)
|
||||
(128721 128722)
|
||||
(128720 128720)
|
||||
(128716 128716)
|
||||
(128705 128709)
|
||||
(128704 128704)
|
||||
(128703 128703)
|
||||
(128697 128702)
|
||||
(128695 128696)
|
||||
(128694 128694)
|
||||
(128691 128693)
|
||||
(128690 128690)
|
||||
(128686 128689)
|
||||
(128679 128685)
|
||||
(128678 128678)
|
||||
(128676 128677)
|
||||
(128675 128675)
|
||||
(128674 128674)
|
||||
(128667 128673)
|
||||
(128665 128666)
|
||||
(128664 128664)
|
||||
(128663 128663)
|
||||
(128662 128662)
|
||||
(128661 128661)
|
||||
(128660 128660)
|
||||
(128657 128659)
|
||||
(128656 128656)
|
||||
(128655 128655)
|
||||
(128654 128654)
|
||||
(128653 128653)
|
||||
(128652 128652)
|
||||
(128650 128651)
|
||||
(128649 128649)
|
||||
(128648 128648)
|
||||
(128647 128647)
|
||||
(128646 128646)
|
||||
(128643 128645)
|
||||
(128641 128642)
|
||||
(128640 128640)
|
||||
(128581 128591)
|
||||
(128577 128580)
|
||||
(128567 128576)
|
||||
(128566 128566)
|
||||
(128565 128565)
|
||||
(128564 128564)
|
||||
(128560 128563)
|
||||
(128558 128559)
|
||||
(128557 128557)
|
||||
(128556 128556)
|
||||
(128552 128555)
|
||||
(128550 128551)
|
||||
(128544 128549)
|
||||
(128543 128543)
|
||||
(128540 128542)
|
||||
(128539 128539)
|
||||
(128538 128538)
|
||||
(128537 128537)
|
||||
(128536 128536)
|
||||
(128535 128535)
|
||||
(128534 128534)
|
||||
(128533 128533)
|
||||
(128530 128532)
|
||||
(128529 128529)
|
||||
(128528 128528)
|
||||
(128527 128527)
|
||||
(128526 128526)
|
||||
(128521 128525)
|
||||
(128519 128520)
|
||||
(128513 128518)
|
||||
(128512 128512)
|
||||
(128507 128511)
|
||||
(128420 128420)
|
||||
(128405 128406)
|
||||
(128378 128378)
|
||||
(128348 128359)
|
||||
(128336 128347)
|
||||
(128331 128334)
|
||||
(128302 128317)
|
||||
(128300 128301)
|
||||
(128278 128299)
|
||||
(128277 128277)
|
||||
(128266 128276)
|
||||
(128265 128265)
|
||||
(128264 128264)
|
||||
(128260 128263)
|
||||
(128259 128259)
|
||||
(128255 128258)
|
||||
(128249 128252)
|
||||
(128248 128248)
|
||||
(128246 128247)
|
||||
(128245 128245)
|
||||
(128240 128244)
|
||||
(128239 128239)
|
||||
(128238 128238)
|
||||
(128236 128237)
|
||||
(128184 128235)
|
||||
(128182 128183)
|
||||
(128174 128181)
|
||||
(128173 128173)
|
||||
(128110 128172)
|
||||
(128108 128109)
|
||||
(128102 128107)
|
||||
(128101 128101)
|
||||
(128066 128100)
|
||||
(128064 128064)
|
||||
(128043 128062)
|
||||
(128042 128042)
|
||||
(128023 128041)
|
||||
(128022 128022)
|
||||
(128021 128021)
|
||||
(128020 128020)
|
||||
(128019 128019)
|
||||
(128017 128018)
|
||||
(128015 128016)
|
||||
(128012 128014)
|
||||
(128009 128011)
|
||||
(128008 128008)
|
||||
(127992 128007)
|
||||
(127988 127988)
|
||||
(127973 127984)
|
||||
(127972 127972)
|
||||
(127968 127971)
|
||||
(127951 127955)
|
||||
(127946 127946)
|
||||
(127945 127945)
|
||||
(127944 127944)
|
||||
(127943 127943)
|
||||
(127942 127942)
|
||||
(127941 127941)
|
||||
(127904 127940)
|
||||
(127872 127891)
|
||||
(127870 127871)
|
||||
(127868 127868)
|
||||
(127825 127867)
|
||||
(127824 127824)
|
||||
(127820 127823)
|
||||
(127819 127819)
|
||||
(127799 127818)
|
||||
(127796 127797)
|
||||
(127794 127795)
|
||||
(127792 127793)
|
||||
(127789 127791)
|
||||
(127775 127776)
|
||||
(127773 127774)
|
||||
(127772 127772)
|
||||
(127771 127771)
|
||||
(127770 127770)
|
||||
(127769 127769)
|
||||
(127766 127768)
|
||||
(127763 127765)
|
||||
(127762 127762)
|
||||
(127761 127761)
|
||||
(127760 127760)
|
||||
(127759 127759)
|
||||
(127757 127758)
|
||||
(127744 127756)
|
||||
(127568 127569)
|
||||
(127544 127546)
|
||||
(127538 127542)
|
||||
(127535 127535)
|
||||
(127514 127514)
|
||||
(127489 127489)
|
||||
(127462 127487)
|
||||
(127377 127386)
|
||||
(127374 127374)
|
||||
(127183 127183)
|
||||
(126980 126980)
|
||||
(11093 11093)
|
||||
(11088 11088)
|
||||
(11035 11036)
|
||||
(10175 10175)
|
||||
(10160 10160)
|
||||
(10133 10135)
|
||||
(10071 10071)
|
||||
(10067 10069)
|
||||
(10062 10062)
|
||||
(10060 10060)
|
||||
(10024 10024)
|
||||
(9994 9995)
|
||||
(9989 9989)
|
||||
(9981 9981)
|
||||
(9978 9978)
|
||||
(9973 9973)
|
||||
(9970 9971)
|
||||
(9962 9962)
|
||||
(9940 9940)
|
||||
(9934 9934)
|
||||
(9924 9925)
|
||||
(9917 9918)
|
||||
(9898 9899)
|
||||
(9889 9889)
|
||||
(9875 9875)
|
||||
(9855 9855)
|
||||
(9800 9811)
|
||||
(9748 9749)
|
||||
(9725 9726)
|
||||
(9203 9203)
|
||||
(9200 9200)
|
||||
(9193 9196)
|
||||
(8986 8987))
|
||||
(emoji-extended-pictographic
|
||||
(130048 131069)
|
||||
(129751 129791)
|
||||
(129744 129750)
|
||||
(129731 129743)
|
||||
|
@ -495,16 +1212,47 @@
|
|||
(8505 8505)
|
||||
(8482 8482)
|
||||
(8265 8265)
|
||||
(8252 8252)))
|
||||
(8252 8252)
|
||||
(174 174)
|
||||
(169 169)))))
|
||||
|
||||
(define char-set:extended-pictographic
|
||||
(define char-set:emoji (char-set))
|
||||
(define char-set:emoji-presentation (char-set))
|
||||
(define char-set:emoji-modifier (char-set))
|
||||
(define char-set:emoji-modifier-base (char-set))
|
||||
(define char-set:emoji-component (char-set))
|
||||
(define char-set:emoji-extended-pictographic
|
||||
(char-set))
|
||||
|
||||
(for-each
|
||||
(λ (pair)
|
||||
(ucs-range->char-set!
|
||||
(first pair)
|
||||
(+ 1 (second pair))
|
||||
#t
|
||||
char-set:extended-pictographic))
|
||||
emoji-list)
|
||||
(define emoji-charsets
|
||||
(list (list 'emoji char-set:emoji)
|
||||
(list 'emoji-presentation
|
||||
char-set:emoji-presentation)
|
||||
(list 'emoji-modifier char-set:emoji-modifier)
|
||||
(list 'emoji-modifier-base
|
||||
char-set:emoji-modifier-base)
|
||||
(list 'emoji-component char-set:emoji-component)
|
||||
(list 'emoji-extended-pictographic
|
||||
char-set:emoji-extended-pictographic)))
|
||||
|
||||
(ranges->charset! emoji-ht 'emoji char-set:emoji)
|
||||
(ranges->charset!
|
||||
emoji-ht
|
||||
'emoji-presentation
|
||||
char-set:emoji-presentation)
|
||||
(ranges->charset!
|
||||
emoji-ht
|
||||
'emoji-modifier
|
||||
char-set:emoji-modifier)
|
||||
(ranges->charset!
|
||||
emoji-ht
|
||||
'emoji-modifier-base
|
||||
char-set:emoji-modifier-base)
|
||||
(ranges->charset!
|
||||
emoji-ht
|
||||
'emoji-component
|
||||
char-set:emoji-component)
|
||||
(ranges->charset!
|
||||
emoji-ht
|
||||
'emoji-extended-pictographic
|
||||
char-set:emoji-extended-pictographic)
|
||||
|
|
|
@ -20,28 +20,176 @@
|
|||
char-set:grapheme-extend
|
||||
char-set:grapheme-regional-indicator
|
||||
char-set:grapheme-spacing-mark
|
||||
char-set:grapheme-zerowidth-joiner
|
||||
char-set:grapheme-zero-width-joiner
|
||||
grapheme-charsets))
|
||||
(define grapheme-ht
|
||||
(alist->hashq-table
|
||||
'((prepend
|
||||
(73474 73474)
|
||||
(73030 73030)
|
||||
(72324 72329)
|
||||
(72250 72250)
|
||||
(72001 72001)
|
||||
(71999 71999)
|
||||
(70082 70083)
|
||||
(69837 69837)
|
||||
(69821 69821)
|
||||
(3406 3406)
|
||||
(2274 2274)
|
||||
(2192 2193)
|
||||
(1807 1807)
|
||||
(1757 1757)
|
||||
(1536 1541))
|
||||
(regional-indicator (127462 127487))
|
||||
(carriage-return (13 13))
|
||||
'((spacing-mark
|
||||
(119149 119149)
|
||||
(119142 119142)
|
||||
(94192 94193)
|
||||
(94033 94087)
|
||||
(73537 73537)
|
||||
(73534 73535)
|
||||
(73524 73525)
|
||||
(73475 73475)
|
||||
(73461 73462)
|
||||
(73110 73110)
|
||||
(73107 73108)
|
||||
(73098 73102)
|
||||
(72884 72884)
|
||||
(72881 72881)
|
||||
(72873 72873)
|
||||
(72766 72766)
|
||||
(72751 72751)
|
||||
(72343 72343)
|
||||
(72279 72280)
|
||||
(72249 72249)
|
||||
(72164 72164)
|
||||
(72156 72159)
|
||||
(72145 72147)
|
||||
(72002 72002)
|
||||
(72000 72000)
|
||||
(71997 71997)
|
||||
(71991 71992)
|
||||
(71985 71989)
|
||||
(71736 71736)
|
||||
(71724 71726)
|
||||
(71462 71462)
|
||||
(71350 71350)
|
||||
(71342 71343)
|
||||
(71340 71340)
|
||||
(71230 71230)
|
||||
(71227 71228)
|
||||
(71216 71218)
|
||||
(71102 71102)
|
||||
(71096 71099)
|
||||
(71088 71089)
|
||||
(70849 70849)
|
||||
(70846 70846)
|
||||
(70843 70844)
|
||||
(70841 70841)
|
||||
(70833 70834)
|
||||
(70725 70725)
|
||||
(70720 70721)
|
||||
(70709 70711)
|
||||
(70498 70499)
|
||||
(70475 70477)
|
||||
(70471 70472)
|
||||
(70465 70468)
|
||||
(70463 70463)
|
||||
(70402 70403)
|
||||
(70368 70370)
|
||||
(70197 70197)
|
||||
(70194 70195)
|
||||
(70188 70190)
|
||||
(70094 70094)
|
||||
(70079 70080)
|
||||
(70067 70069)
|
||||
(70018 70018)
|
||||
(69957 69958)
|
||||
(69932 69932)
|
||||
(69815 69816)
|
||||
(69808 69810)
|
||||
(69762 69762)
|
||||
(69634 69634)
|
||||
(69632 69632)
|
||||
(44012 44012)
|
||||
(44009 44010)
|
||||
(44006 44007)
|
||||
(44003 44004)
|
||||
(43765 43765)
|
||||
(43758 43759)
|
||||
(43755 43755)
|
||||
(43597 43597)
|
||||
(43571 43572)
|
||||
(43567 43568)
|
||||
(43454 43456)
|
||||
(43450 43451)
|
||||
(43444 43445)
|
||||
(43395 43395)
|
||||
(43346 43347)
|
||||
(43188 43203)
|
||||
(43136 43137)
|
||||
(43047 43047)
|
||||
(43043 43044)
|
||||
(7415 7415)
|
||||
(7393 7393)
|
||||
(7220 7221)
|
||||
(7204 7211)
|
||||
(7154 7155)
|
||||
(7150 7150)
|
||||
(7146 7148)
|
||||
(7143 7143)
|
||||
(7082 7082)
|
||||
(7078 7079)
|
||||
(7073 7073)
|
||||
(7042 7042)
|
||||
(6979 6980)
|
||||
(6973 6977)
|
||||
(6971 6971)
|
||||
(6916 6916)
|
||||
(6765 6770)
|
||||
(6743 6743)
|
||||
(6741 6741)
|
||||
(6681 6682)
|
||||
(6451 6456)
|
||||
(6448 6449)
|
||||
(6441 6443)
|
||||
(6435 6438)
|
||||
(6087 6088)
|
||||
(6078 6085)
|
||||
(6070 6070)
|
||||
(5940 5940)
|
||||
(5909 5909)
|
||||
(4228 4228)
|
||||
(4182 4183)
|
||||
(4155 4156)
|
||||
(4145 4145)
|
||||
(3967 3967)
|
||||
(3902 3903)
|
||||
(3763 3763)
|
||||
(3635 3635)
|
||||
(3570 3571)
|
||||
(3544 3550)
|
||||
(3536 3537)
|
||||
(3458 3459)
|
||||
(3402 3404)
|
||||
(3398 3400)
|
||||
(3391 3392)
|
||||
(3330 3331)
|
||||
(3315 3315)
|
||||
(3274 3275)
|
||||
(3271 3272)
|
||||
(3267 3268)
|
||||
(3264 3265)
|
||||
(3262 3262)
|
||||
(3202 3203)
|
||||
(3137 3140)
|
||||
(3073 3075)
|
||||
(3018 3020)
|
||||
(3014 3016)
|
||||
(3009 3010)
|
||||
(3007 3007)
|
||||
(2891 2892)
|
||||
(2887 2888)
|
||||
(2880 2880)
|
||||
(2818 2819)
|
||||
(2763 2764)
|
||||
(2761 2761)
|
||||
(2750 2752)
|
||||
(2691 2691)
|
||||
(2622 2624)
|
||||
(2563 2563)
|
||||
(2507 2508)
|
||||
(2503 2504)
|
||||
(2495 2496)
|
||||
(2434 2435)
|
||||
(2382 2383)
|
||||
(2377 2380)
|
||||
(2366 2368)
|
||||
(2363 2363)
|
||||
(2307 2307))
|
||||
(extend
|
||||
(917760 917999)
|
||||
(917536 917631)
|
||||
|
@ -420,34 +568,6 @@
|
|||
(1160 1161)
|
||||
(1155 1159)
|
||||
(768 879))
|
||||
(hangul-syllable-t (55243 55291) (4520 4607))
|
||||
(control
|
||||
(918000 921599)
|
||||
(917632 917759)
|
||||
(917506 917535)
|
||||
(917505 917505)
|
||||
(917504 917504)
|
||||
(119155 119162)
|
||||
(113824 113827)
|
||||
(78896 78911)
|
||||
(65529 65531)
|
||||
(65520 65528)
|
||||
(65279 65279)
|
||||
(8294 8303)
|
||||
(8293 8293)
|
||||
(8288 8292)
|
||||
(8234 8238)
|
||||
(8233 8233)
|
||||
(8232 8232)
|
||||
(8206 8207)
|
||||
(8203 8203)
|
||||
(6158 6158)
|
||||
(1564 1564)
|
||||
(173 173)
|
||||
(127 159)
|
||||
(14 31)
|
||||
(11 12)
|
||||
(0 9))
|
||||
(hangul-syllable-lv
|
||||
(55176 55176)
|
||||
(55148 55148)
|
||||
|
@ -848,174 +968,6 @@
|
|||
(44088 44088)
|
||||
(44060 44060)
|
||||
(44032 44032))
|
||||
(line-feed (10 10))
|
||||
(hangul-syllable-v (55216 55238) (4448 4519))
|
||||
(spacing-mark
|
||||
(119149 119149)
|
||||
(119142 119142)
|
||||
(94192 94193)
|
||||
(94033 94087)
|
||||
(73537 73537)
|
||||
(73534 73535)
|
||||
(73524 73525)
|
||||
(73475 73475)
|
||||
(73461 73462)
|
||||
(73110 73110)
|
||||
(73107 73108)
|
||||
(73098 73102)
|
||||
(72884 72884)
|
||||
(72881 72881)
|
||||
(72873 72873)
|
||||
(72766 72766)
|
||||
(72751 72751)
|
||||
(72343 72343)
|
||||
(72279 72280)
|
||||
(72249 72249)
|
||||
(72164 72164)
|
||||
(72156 72159)
|
||||
(72145 72147)
|
||||
(72002 72002)
|
||||
(72000 72000)
|
||||
(71997 71997)
|
||||
(71991 71992)
|
||||
(71985 71989)
|
||||
(71736 71736)
|
||||
(71724 71726)
|
||||
(71462 71462)
|
||||
(71350 71350)
|
||||
(71342 71343)
|
||||
(71340 71340)
|
||||
(71230 71230)
|
||||
(71227 71228)
|
||||
(71216 71218)
|
||||
(71102 71102)
|
||||
(71096 71099)
|
||||
(71088 71089)
|
||||
(70849 70849)
|
||||
(70846 70846)
|
||||
(70843 70844)
|
||||
(70841 70841)
|
||||
(70833 70834)
|
||||
(70725 70725)
|
||||
(70720 70721)
|
||||
(70709 70711)
|
||||
(70498 70499)
|
||||
(70475 70477)
|
||||
(70471 70472)
|
||||
(70465 70468)
|
||||
(70463 70463)
|
||||
(70402 70403)
|
||||
(70368 70370)
|
||||
(70197 70197)
|
||||
(70194 70195)
|
||||
(70188 70190)
|
||||
(70094 70094)
|
||||
(70079 70080)
|
||||
(70067 70069)
|
||||
(70018 70018)
|
||||
(69957 69958)
|
||||
(69932 69932)
|
||||
(69815 69816)
|
||||
(69808 69810)
|
||||
(69762 69762)
|
||||
(69634 69634)
|
||||
(69632 69632)
|
||||
(44012 44012)
|
||||
(44009 44010)
|
||||
(44006 44007)
|
||||
(44003 44004)
|
||||
(43765 43765)
|
||||
(43758 43759)
|
||||
(43755 43755)
|
||||
(43597 43597)
|
||||
(43571 43572)
|
||||
(43567 43568)
|
||||
(43454 43456)
|
||||
(43450 43451)
|
||||
(43444 43445)
|
||||
(43395 43395)
|
||||
(43346 43347)
|
||||
(43188 43203)
|
||||
(43136 43137)
|
||||
(43047 43047)
|
||||
(43043 43044)
|
||||
(7415 7415)
|
||||
(7393 7393)
|
||||
(7220 7221)
|
||||
(7204 7211)
|
||||
(7154 7155)
|
||||
(7150 7150)
|
||||
(7146 7148)
|
||||
(7143 7143)
|
||||
(7082 7082)
|
||||
(7078 7079)
|
||||
(7073 7073)
|
||||
(7042 7042)
|
||||
(6979 6980)
|
||||
(6973 6977)
|
||||
(6971 6971)
|
||||
(6916 6916)
|
||||
(6765 6770)
|
||||
(6743 6743)
|
||||
(6741 6741)
|
||||
(6681 6682)
|
||||
(6451 6456)
|
||||
(6448 6449)
|
||||
(6441 6443)
|
||||
(6435 6438)
|
||||
(6087 6088)
|
||||
(6078 6085)
|
||||
(6070 6070)
|
||||
(5940 5940)
|
||||
(5909 5909)
|
||||
(4228 4228)
|
||||
(4182 4183)
|
||||
(4155 4156)
|
||||
(4145 4145)
|
||||
(3967 3967)
|
||||
(3902 3903)
|
||||
(3763 3763)
|
||||
(3635 3635)
|
||||
(3570 3571)
|
||||
(3544 3550)
|
||||
(3536 3537)
|
||||
(3458 3459)
|
||||
(3402 3404)
|
||||
(3398 3400)
|
||||
(3391 3392)
|
||||
(3330 3331)
|
||||
(3315 3315)
|
||||
(3274 3275)
|
||||
(3271 3272)
|
||||
(3267 3268)
|
||||
(3264 3265)
|
||||
(3262 3262)
|
||||
(3202 3203)
|
||||
(3137 3140)
|
||||
(3073 3075)
|
||||
(3018 3020)
|
||||
(3014 3016)
|
||||
(3009 3010)
|
||||
(3007 3007)
|
||||
(2891 2892)
|
||||
(2887 2888)
|
||||
(2880 2880)
|
||||
(2818 2819)
|
||||
(2763 2764)
|
||||
(2761 2761)
|
||||
(2750 2752)
|
||||
(2691 2691)
|
||||
(2622 2624)
|
||||
(2563 2563)
|
||||
(2507 2508)
|
||||
(2503 2504)
|
||||
(2495 2496)
|
||||
(2434 2435)
|
||||
(2382 2383)
|
||||
(2377 2380)
|
||||
(2366 2368)
|
||||
(2363 2363)
|
||||
(2307 2307))
|
||||
(hangul-syllable-lvt
|
||||
(55177 55203)
|
||||
(55149 55175)
|
||||
|
@ -1417,7 +1369,55 @@
|
|||
(44061 44087)
|
||||
(44033 44059))
|
||||
(hangul-syllable-l (43360 43388) (4352 4447))
|
||||
(zerowidth-joiner (8205 8205)))))
|
||||
(hangul-syllable-v (55216 55238) (4448 4519))
|
||||
(prepend
|
||||
(73474 73474)
|
||||
(73030 73030)
|
||||
(72324 72329)
|
||||
(72250 72250)
|
||||
(72001 72001)
|
||||
(71999 71999)
|
||||
(70082 70083)
|
||||
(69837 69837)
|
||||
(69821 69821)
|
||||
(3406 3406)
|
||||
(2274 2274)
|
||||
(2192 2193)
|
||||
(1807 1807)
|
||||
(1757 1757)
|
||||
(1536 1541))
|
||||
(regional-indicator (127462 127487))
|
||||
(line-feed (10 10))
|
||||
(control
|
||||
(918000 921599)
|
||||
(917632 917759)
|
||||
(917506 917535)
|
||||
(917505 917505)
|
||||
(917504 917504)
|
||||
(119155 119162)
|
||||
(113824 113827)
|
||||
(78896 78911)
|
||||
(65529 65531)
|
||||
(65520 65528)
|
||||
(65279 65279)
|
||||
(8294 8303)
|
||||
(8293 8293)
|
||||
(8288 8292)
|
||||
(8234 8238)
|
||||
(8233 8233)
|
||||
(8232 8232)
|
||||
(8206 8207)
|
||||
(8203 8203)
|
||||
(6158 6158)
|
||||
(1564 1564)
|
||||
(173 173)
|
||||
(127 159)
|
||||
(14 31)
|
||||
(11 12)
|
||||
(0 9))
|
||||
(zero-width-joiner (8205 8205))
|
||||
(carriage-return (13 13))
|
||||
(hangul-syllable-t (55243 55291) (4520 4607)))))
|
||||
|
||||
(define char-set:grapheme-hangul-syllable-l
|
||||
(char-set))
|
||||
|
@ -1437,7 +1437,7 @@
|
|||
(char-set))
|
||||
(define char-set:grapheme-spacing-mark
|
||||
(char-set))
|
||||
(define char-set:grapheme-zerowidth-joiner
|
||||
(define char-set:grapheme-zero-width-joiner
|
||||
(char-set))
|
||||
|
||||
(define grapheme-charsets
|
||||
|
@ -1459,8 +1459,8 @@
|
|||
char-set:grapheme-regional-indicator)
|
||||
(list 'spacing-mark
|
||||
char-set:grapheme-spacing-mark)
|
||||
(list 'zerowidth-joiner
|
||||
char-set:grapheme-zerowidth-joiner)))
|
||||
(list 'zero-width-joiner
|
||||
char-set:grapheme-zero-width-joiner)))
|
||||
|
||||
(ranges->charset!
|
||||
grapheme-ht
|
||||
|
@ -1508,5 +1508,5 @@
|
|||
char-set:grapheme-spacing-mark)
|
||||
(ranges->charset!
|
||||
grapheme-ht
|
||||
'zerowidth-joiner
|
||||
char-set:grapheme-zerowidth-joiner)
|
||||
'zero-width-joiner
|
||||
char-set:grapheme-zero-width-joiner)
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
(define-module (runewidth graphemes stream)
|
||||
(define-module (uniseg graphemes stream)
|
||||
#:use-module (uniseg)
|
||||
#:use-module (ice-9 textual-ports)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (srfi srfi-71)
|
||||
#:use-module (srfi srfi-41)
|
||||
#:use-module (srfi srfi-9 gnu)
|
||||
#:export (make-grapheme
|
||||
|
@ -8,20 +11,23 @@
|
|||
grapheme-width
|
||||
grapheme-sentence-end?
|
||||
grapheme-word-end?
|
||||
string->grapheme-stream
|
||||
input->grapheme-stream))
|
||||
|
||||
;; TODO: uniseg also does word and sentence boundaries. These state machines could be implemented if we wanted to.
|
||||
(define-immutable-record-type <grapheme>
|
||||
(make-grapheme glyphs width sentence-end? word-end?)
|
||||
(make-grapheme glyphs width)
|
||||
grapheme?
|
||||
(glyphs grapheme-glyphs)
|
||||
(width grapheme-width)
|
||||
(sentence-end? grapheme-sentence-end?)
|
||||
(word-end? grapheme-word-end?))
|
||||
(width grapheme-width))
|
||||
|
||||
(define (string->grapheme-stream str)
|
||||
"Given a string, create a (lazy) stream of graphemes."
|
||||
(with-input-from-string str
|
||||
(λ () (input->grapheme-stream (current-input-port)))))
|
||||
|
||||
(define-stream (input->grapheme-stream port)
|
||||
(unless port
|
||||
(set! port (current-input-port)))
|
||||
|
||||
"Given an input port, create a (lazy) stream of graphemes."
|
||||
;; The first is what state we are in, and the next is the grapheme
|
||||
;; property of the current character.
|
||||
;; Port of https://github.com/rivo/uniseg/blob/master/graphemerules.go
|
||||
|
@ -29,88 +35,115 @@
|
|||
(match (list cur-state cur-prop)
|
||||
;; Grapheme boundary #3
|
||||
(('carriage-return 'line-feed)
|
||||
(values 'control+line-feed 'no-boundary))
|
||||
(values 'control+line-feed #f))
|
||||
|
||||
;; Grapheme boundary #4
|
||||
((or ('carriage-return _)
|
||||
('control+line-feed _))
|
||||
(values 'any 'boundary))
|
||||
(values 'any #t))
|
||||
|
||||
;; Grapheme boundary #5
|
||||
((_ 'carriage-return)
|
||||
(values 'carriage-return 'boundary))
|
||||
(values 'carriage-return #t))
|
||||
((or (_ 'line-feed)
|
||||
(_ 'control))
|
||||
(values 'control+line-feed 'boundary))
|
||||
(values 'control+line-feed #t))
|
||||
|
||||
;; Grapheme boundary #6
|
||||
(('hangul-syllable-l 'hangul-syllable-l)
|
||||
(values 'hangul-syllable-l 'no-boundary))
|
||||
(values 'hangul-syllable-l #f))
|
||||
((or ('hangul-syllable-l 'hangul-syllable-v)
|
||||
('hangul-syllable-l 'hangul-syllable-lv))
|
||||
(values 'hangul-syllable-lv 'no-boundary))
|
||||
(values 'hangul-syllable-lv #f))
|
||||
(('hangul-syllable-l 'hangul-syllable-lvt)
|
||||
(values 'hangul-syllable-lvt 'no-boundary))
|
||||
(values 'hangul-syllable-lvt #f))
|
||||
((_ 'hangul-syllable-l)
|
||||
(values 'hangul-syllable-l 'boundary))
|
||||
(values 'hangul-syllable-l #t))
|
||||
|
||||
;; Grapheme boundary #7
|
||||
(('hangul-syllable-lv 'hangul-syllable-v)
|
||||
(values 'hangul-syllable-lv 'no-boundary))
|
||||
(values 'hangul-syllable-lv #f))
|
||||
(('hangul-syllable-lv 'hangul-syllable-t)
|
||||
(values 'hangul-syllable-lvt 'no-boundary))
|
||||
(values 'hangul-syllable-lvt #f))
|
||||
((or (_ 'hangul-syllable-lv)
|
||||
(_ 'hangul-syllable-v))
|
||||
(values 'hangul-syllable-lv 'boundary))
|
||||
(values 'hangul-syllable-lv #t))
|
||||
|
||||
;; Grapheme boundary #8
|
||||
((or (_ 'hangul-syllable-lvt)
|
||||
(_ 'hangul-syllable-t))
|
||||
(values 'hangul-syllable-lvt 'boundary))
|
||||
(values 'hangul-syllable-lvt #t))
|
||||
(('hangul-syllable-lvt 'hangul-syllable-t)
|
||||
(values 'hangul-syllable-lvt 'no-boundary))
|
||||
(values 'hangul-syllable-lvt #f))
|
||||
|
||||
;; Grapheme boundary #9
|
||||
((or (_ 'extend)
|
||||
(_ 'zero-width-joiner))
|
||||
(values 'any 'no-boundary))
|
||||
(values 'any #f))
|
||||
|
||||
;; Grapheme boundary #9a
|
||||
((_ 'spacing-mark)
|
||||
(values 'any 'no-boundary))
|
||||
(values 'any #f))
|
||||
|
||||
;; Grapheme boundary #9b
|
||||
(('prepend _)
|
||||
(values 'any 'no-boundary))
|
||||
(values 'any #f))
|
||||
((_ 'prepend)
|
||||
(values 'prepend 'boundary))
|
||||
(values 'prepend #t))
|
||||
|
||||
;; Grapheme boundary #11 (emoji!)
|
||||
(('extended-pictographic 'extend)
|
||||
(values 'extended-pictographic 'no-boundary))
|
||||
(values 'extended-pictographic #f))
|
||||
(('extended-pictographic 'zero-width-joiner)
|
||||
(values 'extended-pictographic+zero-width-joiner 'no-boundary))
|
||||
(values 'extended-pictographic+zero-width-joiner #f))
|
||||
(('extended-pictographic+zero-width-joiner 'extended-pictographic)
|
||||
(values 'extended-pictographic 'no-boundary))
|
||||
(values 'extended-pictographic #f))
|
||||
((_ 'extended-pictographic)
|
||||
(values 'extended-pictographic 'boundary))
|
||||
(values 'extended-pictographic #t))
|
||||
|
||||
|
||||
;; Grapheme boundaries #12 and #13
|
||||
(('regional-indicator-odd 'regional-indicator)
|
||||
(values 'regioinal-indicator-even 'no-boundary))
|
||||
(values 'regioinal-indicator-even #f))
|
||||
(('regional-indicator-even 'regional-indicator)
|
||||
(values 'regional-indicator-odd 'boundary))
|
||||
(values 'regional-indicator-odd #t))
|
||||
((_ 'regional-indicator)
|
||||
(values 'regional-indicator-odd 'boundary))
|
||||
(values 'regional-indicator-odd #t))
|
||||
|
||||
(else (values 'other 'other))))
|
||||
|
||||
(define (transition-state state char)
|
||||
"Given the current state and the next char, run a state transition"
|
||||
;; Everything else considered a boundeary
|
||||
(else (values 'any #t))))
|
||||
|
||||
|
||||
)
|
||||
(define grapheme-width 0)
|
||||
(define glyphs-reverse '())
|
||||
|
||||
(define grapheme (make-grapheme glyphs width sentence-end? word-end?))
|
||||
(stream-cons grapheme (input->grapheme-stream port)))
|
||||
(define hit-eof #f)
|
||||
|
||||
(define (iterate-through-grapheme state)
|
||||
(define glyph (get-char port))
|
||||
(if
|
||||
(eof-object? glyph)
|
||||
(begin
|
||||
(set! hit-eof #t)
|
||||
state)
|
||||
(begin
|
||||
(set! glyphs-reverse (cons glyph glyphs-reverse))
|
||||
(let* ((width property (char-width glyph))
|
||||
(next-state boundary? (state-machine state property)))
|
||||
|
||||
(set! grapheme-width (+ grapheme-width width))
|
||||
|
||||
(if boundary?
|
||||
state
|
||||
(iterate-through-grapheme state))))))
|
||||
|
||||
(define final-state (iterate-through-grapheme 'any))
|
||||
|
||||
(if
|
||||
hit-eof
|
||||
stream-null
|
||||
(stream-cons
|
||||
(make-grapheme
|
||||
(reverse glyphs-reverse)
|
||||
grapheme-width)
|
||||
(input->grapheme-stream port))))
|
||||
|
|
|
@ -21,6 +21,9 @@
|
|||
file-to-lines
|
||||
ranges->charset!))
|
||||
|
||||
;;
|
||||
;; Common PEG patterns
|
||||
;;
|
||||
(define-peg-pattern @hex body (peg "[a-fA-F0-9]"))
|
||||
|
||||
(define-peg-pattern @codepoint all
|
||||
|
@ -31,13 +34,14 @@
|
|||
(and @codepoint (ignore "..") @codepoint)
|
||||
@codepoint))
|
||||
|
||||
|
||||
(define-peg-pattern @comment all
|
||||
(and (ignore "#") (* peg-any)))
|
||||
|
||||
(define-peg-pattern @ws none
|
||||
(or " " "\t"))
|
||||
|
||||
;; Helper macro to add a list of character ranges
|
||||
;; to a hash-set.
|
||||
(define-syntax-rule (cons-hash-list! ht key low high)
|
||||
(let* ((old (hashq-ref ht key))
|
||||
(value (list low high))
|
||||
|
@ -47,6 +51,8 @@
|
|||
(list value))))
|
||||
(hashq-set! ht key new-lst)))
|
||||
|
||||
;; Helper macro to take above character range hash table
|
||||
;; and insert it into Guile's chasracter set system.
|
||||
(define-syntax-rule (ranges->charset! ht name symbol)
|
||||
(let* ((pairs (hashq-ref ht name)))
|
||||
(for-each
|
||||
|
@ -64,15 +70,17 @@
|
|||
;; So they can't be written out as such.
|
||||
(locale-string->integer str 16))
|
||||
|
||||
;; This should be in the standard library...
|
||||
(define (format-exception-msg port err)
|
||||
(apply format port (exception-message err) (exception-irritants err))
|
||||
(display "\n" port))
|
||||
|
||||
;; Little checker to see if we are in the danger zone
|
||||
;; (surrogate range is invalid unicode and guile errors)
|
||||
(define (in-surrogate-range num)
|
||||
(and (>= num #xd800)
|
||||
(<= num #xdfff)))
|
||||
|
||||
|
||||
(define* (wget-to-lines url #:optional (port #f))
|
||||
(format port "Downloading from ~a..." url)
|
||||
|
||||
|
|
Loading…
Reference in New Issue