Significant refactors and cleanups, and add grapheme parse
This commit is contained in:
parent
8ea25ea23d
commit
4a93c70b79
9 changed files with 2067 additions and 457 deletions
4
guix.scm
4
guix.scm
|
@ -80,7 +80,9 @@
|
|||
"GUILE_LOAD_COMPILED_PATH"
|
||||
(compiled-dir out version)
|
||||
(compiled-dir "" version))))
|
||||
,''("generate-east-asian" "generate-emoji"))
|
||||
,''("generate-east-asian"
|
||||
"generate-emoji"
|
||||
"generate-graphemes"))
|
||||
#t))))))))
|
||||
(native-inputs
|
||||
(list autoconf automake pkg-config texinfo))
|
||||
|
|
4
hall.scm
4
hall.scm
|
@ -25,6 +25,7 @@
|
|||
((scheme-file "emoji")
|
||||
(directory "eastasian" ((scheme-file "locale")))
|
||||
(scheme-file "eastasian")
|
||||
(scheme-file "graphemes")
|
||||
(scheme-file "internal")))))
|
||||
(tests ((directory
|
||||
"tests"
|
||||
|
@ -33,7 +34,8 @@
|
|||
((directory
|
||||
"scripts"
|
||||
((in-file "generate-east-asian")
|
||||
(in-file "generate-emoji")))))
|
||||
(in-file "generate-emoji")
|
||||
(in-file "generate-graphemes")))))
|
||||
(documentation
|
||||
((org-file "README")
|
||||
(symlink "README" "README.org")
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
;; Code generated by script/generate. DO NOT EDIT
|
||||
;; Code generated by scripts/generate-east-asian. DO NOT EDIT
|
||||
|
||||
(define-module
|
||||
(runewidth eastasian)
|
||||
|
@ -6,6 +6,8 @@
|
|||
(ice-9 hash-table)
|
||||
#:use-module
|
||||
(srfi srfi-1)
|
||||
#:use-module
|
||||
(runewidth internal)
|
||||
#:export
|
||||
(char-set:eastasian-combining
|
||||
char-set:eastasian-doublewidth
|
||||
|
@ -13,285 +15,61 @@
|
|||
char-set:eastasian-narrow
|
||||
char-set:eastasian-neutral
|
||||
char-set:eastasian-ambiguous))
|
||||
(define chars-ht
|
||||
(define eastasian-ht
|
||||
(alist->hashq-table
|
||||
'((doublewidth
|
||||
(201547 262141)
|
||||
(196608 201546)
|
||||
(195104 196605)
|
||||
(195102 195103)
|
||||
(194560 195101)
|
||||
(191457 194559)
|
||||
(183984 191456)
|
||||
(183970 183983)
|
||||
(178208 183969)
|
||||
(178206 178207)
|
||||
(177984 178205)
|
||||
(177973 177983)
|
||||
(173824 177972)
|
||||
(173790 173823)
|
||||
(131072 173789)
|
||||
(129744 129750)
|
||||
(129728 129730)
|
||||
(129712 129718)
|
||||
(129680 129704)
|
||||
(129664 129670)
|
||||
(129656 129658)
|
||||
(129648 129652)
|
||||
(129485 129535)
|
||||
(129402 129483)
|
||||
(129351 129400)
|
||||
(129340 129349)
|
||||
(129292 129338)
|
||||
(128992 129003)
|
||||
(128756 128764)
|
||||
(128747 128748)
|
||||
(128725 128727)
|
||||
(128720 128722)
|
||||
(128716 128716)
|
||||
(128640 128709)
|
||||
(128512 128591)
|
||||
(128507 128511)
|
||||
(128420 128420)
|
||||
(128405 128406)
|
||||
(128378 128378)
|
||||
(128336 128359)
|
||||
(128331 128334)
|
||||
(128255 128317)
|
||||
(128066 128252)
|
||||
(128064 128064)
|
||||
(128000 128062)
|
||||
(127995 127999)
|
||||
(127992 127994)
|
||||
(127988 127988)
|
||||
(127968 127984)
|
||||
(127951 127955)
|
||||
(127904 127946)
|
||||
(127870 127891)
|
||||
(127799 127868)
|
||||
(127789 127797)
|
||||
(127744 127776)
|
||||
(127584 127589)
|
||||
(127568 127569)
|
||||
(127552 127560)
|
||||
(127504 127547)
|
||||
(127488 127490)
|
||||
(127377 127386)
|
||||
(127374 127374)
|
||||
(127183 127183)
|
||||
(126980 126980)
|
||||
(110960 111355)
|
||||
(110948 110951)
|
||||
(110928 110930)
|
||||
(110848 110878)
|
||||
(110592 110847)
|
||||
(101632 101640)
|
||||
(101120 101589)
|
||||
(100352 101119)
|
||||
(94208 100343)
|
||||
(94192 94193)
|
||||
(94180 94180)
|
||||
(94179 94179)
|
||||
(94178 94178)
|
||||
(94176 94177)
|
||||
(65509 65510)
|
||||
(65508 65508)
|
||||
(65507 65507)
|
||||
(65506 65506)
|
||||
(65504 65505)
|
||||
(65376 65376)
|
||||
(65375 65375)
|
||||
(65374 65374)
|
||||
(65373 65373)
|
||||
(65372 65372)
|
||||
(65371 65371)
|
||||
(65345 65370)
|
||||
(65344 65344)
|
||||
(65343 65343)
|
||||
(65342 65342)
|
||||
(65341 65341)
|
||||
(65340 65340)
|
||||
(65339 65339)
|
||||
(65313 65338)
|
||||
(65311 65312)
|
||||
(65308 65310)
|
||||
(65306 65307)
|
||||
(65296 65305)
|
||||
(65294 65295)
|
||||
(65293 65293)
|
||||
(65292 65292)
|
||||
(65291 65291)
|
||||
(65290 65290)
|
||||
(65289 65289)
|
||||
(65288 65288)
|
||||
(65285 65287)
|
||||
(65284 65284)
|
||||
(65281 65283)
|
||||
(65130 65131)
|
||||
(65129 65129)
|
||||
(65128 65128)
|
||||
(65124 65126)
|
||||
(65123 65123)
|
||||
(65122 65122)
|
||||
(65119 65121)
|
||||
(65118 65118)
|
||||
(65117 65117)
|
||||
(65116 65116)
|
||||
(65115 65115)
|
||||
(65114 65114)
|
||||
(65113 65113)
|
||||
(65112 65112)
|
||||
(65108 65111)
|
||||
(65104 65106)
|
||||
(65101 65103)
|
||||
(65097 65100)
|
||||
(65096 65096)
|
||||
(65095 65095)
|
||||
(65093 65094)
|
||||
(65092 65092)
|
||||
(65091 65091)
|
||||
(65090 65090)
|
||||
(65089 65089)
|
||||
(65088 65088)
|
||||
(65087 65087)
|
||||
(65086 65086)
|
||||
(65085 65085)
|
||||
(65084 65084)
|
||||
(65083 65083)
|
||||
(65082 65082)
|
||||
(65081 65081)
|
||||
(65080 65080)
|
||||
(65079 65079)
|
||||
(65078 65078)
|
||||
(65077 65077)
|
||||
(65075 65076)
|
||||
(65073 65074)
|
||||
(65072 65072)
|
||||
(65049 65049)
|
||||
(65048 65048)
|
||||
(65047 65047)
|
||||
(65040 65046)
|
||||
(64218 64255)
|
||||
(64112 64217)
|
||||
(64110 64111)
|
||||
(63744 64109)
|
||||
(44032 55203)
|
||||
(43360 43388)
|
||||
(42128 42182)
|
||||
(40982 42124)
|
||||
(40981 40981)
|
||||
(40960 40980)
|
||||
(40957 40959)
|
||||
(19968 40956)
|
||||
(13312 19903)
|
||||
(13056 13311)
|
||||
(12992 13055)
|
||||
(12977 12991)
|
||||
(12938 12976)
|
||||
(12928 12937)
|
||||
(12896 12927)
|
||||
(12881 12895)
|
||||
(12880 12880)
|
||||
(12842 12871)
|
||||
(12832 12841)
|
||||
(12800 12830)
|
||||
(12784 12799)
|
||||
(12736 12771)
|
||||
(12704 12735)
|
||||
(12694 12703)
|
||||
(12690 12693)
|
||||
(12688 12689)
|
||||
(12593 12686)
|
||||
(12549 12591)
|
||||
(12543 12543)
|
||||
(12540 12542)
|
||||
(12539 12539)
|
||||
(12449 12538)
|
||||
(12448 12448)
|
||||
(12447 12447)
|
||||
(12445 12446)
|
||||
(12443 12444)
|
||||
(12353 12438)
|
||||
(12350 12350)
|
||||
(12349 12349)
|
||||
(12348 12348)
|
||||
(12347 12347)
|
||||
(12344 12346)
|
||||
(12342 12343)
|
||||
(12337 12341)
|
||||
(12336 12336)
|
||||
(12334 12335)
|
||||
(12330 12333)
|
||||
(12321 12329)
|
||||
(12320 12320)
|
||||
(12318 12319)
|
||||
(12317 12317)
|
||||
(12316 12316)
|
||||
(12315 12315)
|
||||
(12314 12314)
|
||||
(12313 12313)
|
||||
(12312 12312)
|
||||
(12311 12311)
|
||||
(12310 12310)
|
||||
(12309 12309)
|
||||
(12308 12308)
|
||||
(12306 12307)
|
||||
(12305 12305)
|
||||
(12304 12304)
|
||||
(12303 12303)
|
||||
(12302 12302)
|
||||
(12301 12301)
|
||||
(12300 12300)
|
||||
(12299 12299)
|
||||
(12298 12298)
|
||||
(12297 12297)
|
||||
(12296 12296)
|
||||
(12295 12295)
|
||||
(12294 12294)
|
||||
(12293 12293)
|
||||
(12292 12292)
|
||||
(12289 12291)
|
||||
(12288 12288)
|
||||
(12272 12283)
|
||||
(12032 12245)
|
||||
(11931 12019)
|
||||
(11904 11929)
|
||||
(11093 11093)
|
||||
(11088 11088)
|
||||
(11035 11036)
|
||||
(10175 10175)
|
||||
(10160 10160)
|
||||
(10133 10135)
|
||||
(10071 10071)
|
||||
(10067 10069)
|
||||
(10062 10062)
|
||||
(10060 10060)
|
||||
(10024 10024)
|
||||
(9994 9995)
|
||||
(9989 9989)
|
||||
(9981 9981)
|
||||
(9978 9978)
|
||||
(9973 9973)
|
||||
(9970 9971)
|
||||
(9962 9962)
|
||||
(9940 9940)
|
||||
(9934 9934)
|
||||
(9924 9925)
|
||||
(9917 9918)
|
||||
(9898 9899)
|
||||
(9889 9889)
|
||||
(9875 9875)
|
||||
(9855 9855)
|
||||
(9800 9811)
|
||||
(9748 9749)
|
||||
(9725 9726)
|
||||
(9203 9203)
|
||||
(9200 9200)
|
||||
(9193 9196)
|
||||
(9002 9002)
|
||||
(9001 9001)
|
||||
(8986 8987)
|
||||
(4352 4447))
|
||||
'((combining
|
||||
(125136 125142)
|
||||
(122918 122922)
|
||||
(122915 122916)
|
||||
(122907 122913)
|
||||
(122888 122904)
|
||||
(122880 122886)
|
||||
(119362 119364)
|
||||
(119210 119213)
|
||||
(119173 119179)
|
||||
(119163 119170)
|
||||
(119149 119154)
|
||||
(119143 119145)
|
||||
(119141 119142)
|
||||
(92912 92916)
|
||||
(70512 70516)
|
||||
(70502 70508)
|
||||
(70459 70460)
|
||||
(70400 70401)
|
||||
(69446 69456)
|
||||
(69291 69292)
|
||||
(66422 66426)
|
||||
(66045 66045)
|
||||
(65056 65071)
|
||||
(43232 43249)
|
||||
(42736 42737)
|
||||
(42654 42655)
|
||||
(42612 42621)
|
||||
(42608 42610)
|
||||
(42607 42607)
|
||||
(12441 12442)
|
||||
(11744 11775)
|
||||
(11503 11505)
|
||||
(8421 8432)
|
||||
(8418 8420)
|
||||
(8417 8417)
|
||||
(8413 8416)
|
||||
(8400 8412)
|
||||
(7675 7679)
|
||||
(7616 7673)
|
||||
(7019 7027)
|
||||
(6847 6848)
|
||||
(6846 6846)
|
||||
(6832 6845)
|
||||
(6783 6783)
|
||||
(4957 4959)
|
||||
(3328 3329)
|
||||
(3076 3076)
|
||||
(3072 3072)
|
||||
(2027 2035)
|
||||
(1160 1161)
|
||||
(1155 1159)
|
||||
(768 879))
|
||||
(ambiguous
|
||||
(1048576 1114109)
|
||||
(983040 1048573)
|
||||
|
@ -490,59 +268,67 @@
|
|||
(167 167)
|
||||
(164 164)
|
||||
(161 161))
|
||||
(combining
|
||||
(125136 125142)
|
||||
(122918 122922)
|
||||
(122915 122916)
|
||||
(122907 122913)
|
||||
(122888 122904)
|
||||
(122880 122886)
|
||||
(119362 119364)
|
||||
(119210 119213)
|
||||
(119173 119179)
|
||||
(119163 119170)
|
||||
(119149 119154)
|
||||
(119143 119145)
|
||||
(119141 119142)
|
||||
(92912 92916)
|
||||
(70512 70516)
|
||||
(70502 70508)
|
||||
(70459 70460)
|
||||
(70400 70401)
|
||||
(69446 69456)
|
||||
(69291 69292)
|
||||
(66422 66426)
|
||||
(66045 66045)
|
||||
(65056 65071)
|
||||
(43232 43249)
|
||||
(42736 42737)
|
||||
(42654 42655)
|
||||
(42612 42621)
|
||||
(42608 42610)
|
||||
(42607 42607)
|
||||
(12441 12442)
|
||||
(11744 11775)
|
||||
(11503 11505)
|
||||
(8421 8432)
|
||||
(8418 8420)
|
||||
(8417 8417)
|
||||
(8413 8416)
|
||||
(8400 8412)
|
||||
(7675 7679)
|
||||
(7616 7673)
|
||||
(7019 7027)
|
||||
(6847 6848)
|
||||
(6846 6846)
|
||||
(6832 6845)
|
||||
(6783 6783)
|
||||
(4957 4959)
|
||||
(3328 3329)
|
||||
(3076 3076)
|
||||
(3072 3072)
|
||||
(2027 2035)
|
||||
(1160 1161)
|
||||
(1155 1159)
|
||||
(768 879))
|
||||
(halfwidth
|
||||
(65517 65518)
|
||||
(65513 65516)
|
||||
(65512 65512)
|
||||
(65498 65500)
|
||||
(65490 65495)
|
||||
(65482 65487)
|
||||
(65474 65479)
|
||||
(65440 65470)
|
||||
(65438 65439)
|
||||
(65393 65437)
|
||||
(65392 65392)
|
||||
(65382 65391)
|
||||
(65380 65381)
|
||||
(65379 65379)
|
||||
(65378 65378)
|
||||
(65377 65377)
|
||||
(8361 8361))
|
||||
(narrow
|
||||
(10630 10630)
|
||||
(10629 10629)
|
||||
(10221 10221)
|
||||
(10220 10220)
|
||||
(10219 10219)
|
||||
(10218 10218)
|
||||
(10217 10217)
|
||||
(10216 10216)
|
||||
(10215 10215)
|
||||
(10214 10214)
|
||||
(175 175)
|
||||
(172 172)
|
||||
(166 166)
|
||||
(165 165)
|
||||
(162 163)
|
||||
(126 126)
|
||||
(125 125)
|
||||
(124 124)
|
||||
(123 123)
|
||||
(97 122)
|
||||
(96 96)
|
||||
(95 95)
|
||||
(94 94)
|
||||
(93 93)
|
||||
(92 92)
|
||||
(91 91)
|
||||
(65 90)
|
||||
(63 64)
|
||||
(60 62)
|
||||
(58 59)
|
||||
(48 57)
|
||||
(46 47)
|
||||
(45 45)
|
||||
(44 44)
|
||||
(43 43)
|
||||
(42 42)
|
||||
(41 41)
|
||||
(40 40)
|
||||
(37 39)
|
||||
(36 36)
|
||||
(33 35)
|
||||
(32 32))
|
||||
(neutral
|
||||
(917536 917631)
|
||||
(917505 917505)
|
||||
|
@ -2435,79 +2221,283 @@
|
|||
(128 159)
|
||||
(127 127)
|
||||
(0 31))
|
||||
(narrow
|
||||
(10630 10630)
|
||||
(10629 10629)
|
||||
(10221 10221)
|
||||
(10220 10220)
|
||||
(10219 10219)
|
||||
(10218 10218)
|
||||
(10217 10217)
|
||||
(10216 10216)
|
||||
(10215 10215)
|
||||
(10214 10214)
|
||||
(175 175)
|
||||
(172 172)
|
||||
(166 166)
|
||||
(165 165)
|
||||
(162 163)
|
||||
(126 126)
|
||||
(125 125)
|
||||
(124 124)
|
||||
(123 123)
|
||||
(97 122)
|
||||
(96 96)
|
||||
(95 95)
|
||||
(94 94)
|
||||
(93 93)
|
||||
(92 92)
|
||||
(91 91)
|
||||
(65 90)
|
||||
(63 64)
|
||||
(60 62)
|
||||
(58 59)
|
||||
(48 57)
|
||||
(46 47)
|
||||
(45 45)
|
||||
(44 44)
|
||||
(43 43)
|
||||
(42 42)
|
||||
(41 41)
|
||||
(40 40)
|
||||
(37 39)
|
||||
(36 36)
|
||||
(33 35)
|
||||
(32 32))
|
||||
(halfwidth
|
||||
(65517 65518)
|
||||
(65513 65516)
|
||||
(65512 65512)
|
||||
(65498 65500)
|
||||
(65490 65495)
|
||||
(65482 65487)
|
||||
(65474 65479)
|
||||
(65440 65470)
|
||||
(65438 65439)
|
||||
(65393 65437)
|
||||
(65392 65392)
|
||||
(65382 65391)
|
||||
(65380 65381)
|
||||
(65379 65379)
|
||||
(65378 65378)
|
||||
(65377 65377)
|
||||
(8361 8361)))))
|
||||
|
||||
(define-syntax-rule
|
||||
(ranges->charset! name symbol)
|
||||
(let* ((pairs (hashq-ref chars-ht name)))
|
||||
(for-each
|
||||
(λ (pair)
|
||||
(ucs-range->char-set!
|
||||
(first pair)
|
||||
(+ (second pair) 1)
|
||||
#t
|
||||
symbol))
|
||||
pairs)))
|
||||
(doublewidth
|
||||
(201547 262141)
|
||||
(196608 201546)
|
||||
(195104 196605)
|
||||
(195102 195103)
|
||||
(194560 195101)
|
||||
(191457 194559)
|
||||
(183984 191456)
|
||||
(183970 183983)
|
||||
(178208 183969)
|
||||
(178206 178207)
|
||||
(177984 178205)
|
||||
(177973 177983)
|
||||
(173824 177972)
|
||||
(173790 173823)
|
||||
(131072 173789)
|
||||
(129744 129750)
|
||||
(129728 129730)
|
||||
(129712 129718)
|
||||
(129680 129704)
|
||||
(129664 129670)
|
||||
(129656 129658)
|
||||
(129648 129652)
|
||||
(129485 129535)
|
||||
(129402 129483)
|
||||
(129351 129400)
|
||||
(129340 129349)
|
||||
(129292 129338)
|
||||
(128992 129003)
|
||||
(128756 128764)
|
||||
(128747 128748)
|
||||
(128725 128727)
|
||||
(128720 128722)
|
||||
(128716 128716)
|
||||
(128640 128709)
|
||||
(128512 128591)
|
||||
(128507 128511)
|
||||
(128420 128420)
|
||||
(128405 128406)
|
||||
(128378 128378)
|
||||
(128336 128359)
|
||||
(128331 128334)
|
||||
(128255 128317)
|
||||
(128066 128252)
|
||||
(128064 128064)
|
||||
(128000 128062)
|
||||
(127995 127999)
|
||||
(127992 127994)
|
||||
(127988 127988)
|
||||
(127968 127984)
|
||||
(127951 127955)
|
||||
(127904 127946)
|
||||
(127870 127891)
|
||||
(127799 127868)
|
||||
(127789 127797)
|
||||
(127744 127776)
|
||||
(127584 127589)
|
||||
(127568 127569)
|
||||
(127552 127560)
|
||||
(127504 127547)
|
||||
(127488 127490)
|
||||
(127377 127386)
|
||||
(127374 127374)
|
||||
(127183 127183)
|
||||
(126980 126980)
|
||||
(110960 111355)
|
||||
(110948 110951)
|
||||
(110928 110930)
|
||||
(110848 110878)
|
||||
(110592 110847)
|
||||
(101632 101640)
|
||||
(101120 101589)
|
||||
(100352 101119)
|
||||
(94208 100343)
|
||||
(94192 94193)
|
||||
(94180 94180)
|
||||
(94179 94179)
|
||||
(94178 94178)
|
||||
(94176 94177)
|
||||
(65509 65510)
|
||||
(65508 65508)
|
||||
(65507 65507)
|
||||
(65506 65506)
|
||||
(65504 65505)
|
||||
(65376 65376)
|
||||
(65375 65375)
|
||||
(65374 65374)
|
||||
(65373 65373)
|
||||
(65372 65372)
|
||||
(65371 65371)
|
||||
(65345 65370)
|
||||
(65344 65344)
|
||||
(65343 65343)
|
||||
(65342 65342)
|
||||
(65341 65341)
|
||||
(65340 65340)
|
||||
(65339 65339)
|
||||
(65313 65338)
|
||||
(65311 65312)
|
||||
(65308 65310)
|
||||
(65306 65307)
|
||||
(65296 65305)
|
||||
(65294 65295)
|
||||
(65293 65293)
|
||||
(65292 65292)
|
||||
(65291 65291)
|
||||
(65290 65290)
|
||||
(65289 65289)
|
||||
(65288 65288)
|
||||
(65285 65287)
|
||||
(65284 65284)
|
||||
(65281 65283)
|
||||
(65130 65131)
|
||||
(65129 65129)
|
||||
(65128 65128)
|
||||
(65124 65126)
|
||||
(65123 65123)
|
||||
(65122 65122)
|
||||
(65119 65121)
|
||||
(65118 65118)
|
||||
(65117 65117)
|
||||
(65116 65116)
|
||||
(65115 65115)
|
||||
(65114 65114)
|
||||
(65113 65113)
|
||||
(65112 65112)
|
||||
(65108 65111)
|
||||
(65104 65106)
|
||||
(65101 65103)
|
||||
(65097 65100)
|
||||
(65096 65096)
|
||||
(65095 65095)
|
||||
(65093 65094)
|
||||
(65092 65092)
|
||||
(65091 65091)
|
||||
(65090 65090)
|
||||
(65089 65089)
|
||||
(65088 65088)
|
||||
(65087 65087)
|
||||
(65086 65086)
|
||||
(65085 65085)
|
||||
(65084 65084)
|
||||
(65083 65083)
|
||||
(65082 65082)
|
||||
(65081 65081)
|
||||
(65080 65080)
|
||||
(65079 65079)
|
||||
(65078 65078)
|
||||
(65077 65077)
|
||||
(65075 65076)
|
||||
(65073 65074)
|
||||
(65072 65072)
|
||||
(65049 65049)
|
||||
(65048 65048)
|
||||
(65047 65047)
|
||||
(65040 65046)
|
||||
(64218 64255)
|
||||
(64112 64217)
|
||||
(64110 64111)
|
||||
(63744 64109)
|
||||
(44032 55203)
|
||||
(43360 43388)
|
||||
(42128 42182)
|
||||
(40982 42124)
|
||||
(40981 40981)
|
||||
(40960 40980)
|
||||
(40957 40959)
|
||||
(19968 40956)
|
||||
(13312 19903)
|
||||
(13056 13311)
|
||||
(12992 13055)
|
||||
(12977 12991)
|
||||
(12938 12976)
|
||||
(12928 12937)
|
||||
(12896 12927)
|
||||
(12881 12895)
|
||||
(12880 12880)
|
||||
(12842 12871)
|
||||
(12832 12841)
|
||||
(12800 12830)
|
||||
(12784 12799)
|
||||
(12736 12771)
|
||||
(12704 12735)
|
||||
(12694 12703)
|
||||
(12690 12693)
|
||||
(12688 12689)
|
||||
(12593 12686)
|
||||
(12549 12591)
|
||||
(12543 12543)
|
||||
(12540 12542)
|
||||
(12539 12539)
|
||||
(12449 12538)
|
||||
(12448 12448)
|
||||
(12447 12447)
|
||||
(12445 12446)
|
||||
(12443 12444)
|
||||
(12353 12438)
|
||||
(12350 12350)
|
||||
(12349 12349)
|
||||
(12348 12348)
|
||||
(12347 12347)
|
||||
(12344 12346)
|
||||
(12342 12343)
|
||||
(12337 12341)
|
||||
(12336 12336)
|
||||
(12334 12335)
|
||||
(12330 12333)
|
||||
(12321 12329)
|
||||
(12320 12320)
|
||||
(12318 12319)
|
||||
(12317 12317)
|
||||
(12316 12316)
|
||||
(12315 12315)
|
||||
(12314 12314)
|
||||
(12313 12313)
|
||||
(12312 12312)
|
||||
(12311 12311)
|
||||
(12310 12310)
|
||||
(12309 12309)
|
||||
(12308 12308)
|
||||
(12306 12307)
|
||||
(12305 12305)
|
||||
(12304 12304)
|
||||
(12303 12303)
|
||||
(12302 12302)
|
||||
(12301 12301)
|
||||
(12300 12300)
|
||||
(12299 12299)
|
||||
(12298 12298)
|
||||
(12297 12297)
|
||||
(12296 12296)
|
||||
(12295 12295)
|
||||
(12294 12294)
|
||||
(12293 12293)
|
||||
(12292 12292)
|
||||
(12289 12291)
|
||||
(12288 12288)
|
||||
(12272 12283)
|
||||
(12032 12245)
|
||||
(11931 12019)
|
||||
(11904 11929)
|
||||
(11093 11093)
|
||||
(11088 11088)
|
||||
(11035 11036)
|
||||
(10175 10175)
|
||||
(10160 10160)
|
||||
(10133 10135)
|
||||
(10071 10071)
|
||||
(10067 10069)
|
||||
(10062 10062)
|
||||
(10060 10060)
|
||||
(10024 10024)
|
||||
(9994 9995)
|
||||
(9989 9989)
|
||||
(9981 9981)
|
||||
(9978 9978)
|
||||
(9973 9973)
|
||||
(9970 9971)
|
||||
(9962 9962)
|
||||
(9940 9940)
|
||||
(9934 9934)
|
||||
(9924 9925)
|
||||
(9917 9918)
|
||||
(9898 9899)
|
||||
(9889 9889)
|
||||
(9875 9875)
|
||||
(9855 9855)
|
||||
(9800 9811)
|
||||
(9748 9749)
|
||||
(9725 9726)
|
||||
(9203 9203)
|
||||
(9200 9200)
|
||||
(9193 9196)
|
||||
(9002 9002)
|
||||
(9001 9001)
|
||||
(8986 8987)
|
||||
(4352 4447)))))
|
||||
|
||||
(define char-set:eastasian-combining (char-set))
|
||||
(define char-set:eastasian-doublewidth
|
||||
|
@ -2518,20 +2508,26 @@
|
|||
(define char-set:eastasian-ambiguous (char-set))
|
||||
|
||||
(ranges->charset!
|
||||
eastasian-ht
|
||||
'combining
|
||||
char-set:eastasian-combining)
|
||||
(ranges->charset!
|
||||
eastasian-ht
|
||||
'doublewidth
|
||||
char-set:eastasian-doublewidth)
|
||||
(ranges->charset!
|
||||
eastasian-ht
|
||||
'halfwidth
|
||||
char-set:eastasian-halfwidth)
|
||||
(ranges->charset!
|
||||
eastasian-ht
|
||||
'narrow
|
||||
char-set:eastasian-narrow)
|
||||
(ranges->charset!
|
||||
eastasian-ht
|
||||
'neutral
|
||||
char-set:eastasian-neutral)
|
||||
(ranges->charset!
|
||||
eastasian-ht
|
||||
'ambiguous
|
||||
char-set:eastasian-ambiguous)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
;; Code generated by script/generate. DO NOT EDIT
|
||||
;; Code generated by scripts/generate-emoji. DO NOT EDIT
|
||||
|
||||
(define-module
|
||||
(runewidth emoji)
|
||||
|
|
1468
runewidth/graphemes.scm
Normal file
1468
runewidth/graphemes.scm
Normal file
File diff suppressed because it is too large
Load diff
|
@ -6,17 +6,20 @@
|
|||
#:use-module (web uri)
|
||||
#:use-module (web client)
|
||||
#:use-module (web request)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (srfi srfi-71)
|
||||
#:export (@hex
|
||||
@codepoint
|
||||
@codepoint-range
|
||||
@comment
|
||||
@ws
|
||||
cons-hash-list!
|
||||
hex-string->integer
|
||||
format-exception-msg
|
||||
in-surrogate-range
|
||||
wget-to-lines
|
||||
file-to-lines))
|
||||
file-to-lines
|
||||
ranges->charset!))
|
||||
|
||||
(define-peg-pattern @hex body (peg "[a-fA-F0-9]"))
|
||||
|
||||
|
@ -35,6 +38,26 @@
|
|||
(define-peg-pattern @ws none
|
||||
(or " " "\t"))
|
||||
|
||||
(define-syntax-rule (cons-hash-list! ht key low high)
|
||||
(let* ((old (hashq-ref ht key))
|
||||
(value (list low high))
|
||||
(new-lst
|
||||
(if old
|
||||
(cons value old)
|
||||
(list value))))
|
||||
(hashq-set! ht key new-lst)))
|
||||
|
||||
(define-syntax-rule (ranges->charset! ht name symbol)
|
||||
(let* ((pairs (hashq-ref ht name)))
|
||||
(for-each
|
||||
(λ (pair)
|
||||
(ucs-range->char-set!
|
||||
(first pair)
|
||||
;; Exclusive upper range, so add one
|
||||
(+ (second pair) 1)
|
||||
#t symbol))
|
||||
pairs)))
|
||||
|
||||
(define (hex-string->integer str)
|
||||
;; XXX: We would ideally do integer->char here and save it to file as such
|
||||
;; However read-expr* does not actually work for all the characters!
|
||||
|
|
|
@ -28,17 +28,18 @@
|
|||
(define-peg-pattern @ea-line body
|
||||
(and @ea-datum (* @ws) @comment))
|
||||
|
||||
(define ea-chars-ht (make-hash-table 6))
|
||||
(define eastasian-ht (make-hash-table 6))
|
||||
|
||||
(define (process-east-asian-line line)
|
||||
(define (cons-ht! key low high)
|
||||
(let* ((old (hashq-ref ea-chars-ht key))
|
||||
(value (list low high))
|
||||
(new-lst
|
||||
(if old
|
||||
(cons value old)
|
||||
(list value))))
|
||||
(hashq-set! ea-chars-ht key new-lst)))
|
||||
(define (string->property str comment)
|
||||
(if (string-contains comment "COMBINING")
|
||||
'combining
|
||||
(match str
|
||||
((or "W" "F") 'doublewidth)
|
||||
("H" 'halfwidth)
|
||||
("Na" 'narrow)
|
||||
("N" 'neutral)
|
||||
("A" 'ambiguous))))
|
||||
|
||||
(define tree (peg:tree (match-pattern @ea-line line)))
|
||||
|
||||
|
@ -49,7 +50,7 @@
|
|||
(match tree
|
||||
(((('@codepoint-range
|
||||
('@codepoint codepoints) ...)
|
||||
('@ea-width-prop width-prop))
|
||||
('@ea-width-prop prop-str))
|
||||
('@comment comment))
|
||||
|
||||
(with-exception-handler
|
||||
|
@ -58,25 +59,14 @@
|
|||
(format-exception-msg stdout err))
|
||||
(λ ()
|
||||
(let ((f (hex-string->integer (first codepoints)))
|
||||
(l (hex-string->integer (last codepoints))))
|
||||
(l (hex-string->integer (last codepoints)))
|
||||
(width-prop (string->property prop-str comment)))
|
||||
|
||||
(when (or (in-surrogate-range f)
|
||||
(in-surrogate-range l))
|
||||
(error (format #f "chars in surrogate range ~x -> ~x" f l)))
|
||||
|
||||
(if (string-contains comment "COMBINING")
|
||||
(cons-ht! 'combining f l)
|
||||
(match width-prop
|
||||
((or "W" "F")
|
||||
(cons-ht! 'doublewidth f l))
|
||||
("H"
|
||||
(cons-ht! 'halfwidth f l))
|
||||
("Na"
|
||||
(cons-ht! 'narrow f l))
|
||||
("N"
|
||||
(cons-ht! 'neutral f l))
|
||||
("A"
|
||||
(cons-ht! 'ambiguous f l))))))
|
||||
(cons-hash-list! eastasian-ht width-prop f l)))
|
||||
#:unwind? #t)))))
|
||||
|
||||
(define ea-sets
|
||||
|
@ -113,7 +103,7 @@
|
|||
|
||||
(with-output-to-file file
|
||||
(λ ()
|
||||
(display ";; Code generated by script/generate. DO NOT EDIT\n\n")
|
||||
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line)))
|
||||
|
||||
(for-each process-east-asian-line (line-func))
|
||||
|
||||
|
@ -121,26 +111,13 @@
|
|||
`(define-module (runewidth eastasian)
|
||||
#:use-module (ice-9 hash-table)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (runewidth internal)
|
||||
#:export
|
||||
,ea-symbol-names))
|
||||
|
||||
(pretty-print
|
||||
`(define chars-ht
|
||||
(alist->hashq-table ',(hash-map->list cons ea-chars-ht))))
|
||||
|
||||
(display "\n")
|
||||
|
||||
(pretty-print
|
||||
`(define-syntax-rule (ranges->charset! name symbol)
|
||||
(let* ((pairs (hashq-ref chars-ht name)))
|
||||
(for-each
|
||||
(λ (pair)
|
||||
(ucs-range->char-set!
|
||||
(first pair)
|
||||
;; Exclusive upper range, so add one
|
||||
(+ (second pair) 1)
|
||||
#t symbol))
|
||||
pairs))))
|
||||
`(define eastasian-ht
|
||||
(alist->hashq-table ',(hash-map->list cons eastasian-ht))))
|
||||
|
||||
(display "\n")
|
||||
|
||||
|
@ -156,10 +133,11 @@
|
|||
(λ (set-pair)
|
||||
(let ((name (first set-pair))
|
||||
(symbol (second set-pair)))
|
||||
(pretty-print
|
||||
`(ranges->charset! ',name ,symbol))))
|
||||
(pretty-print
|
||||
`(ranges->charset! eastasian-ht ',name ,symbol))))
|
||||
ea-sets-and-symbols)
|
||||
|
||||
(display "Code generation complete.\n" stdout)))
|
||||
|
||||
(format stdout "Written to ~a.\n" file)
|
||||
|
||||
|
|
|
@ -32,10 +32,6 @@
|
|||
(define (process-emoji-line line)
|
||||
(define tree (peg:tree (match-pattern @emoji-line line)))
|
||||
|
||||
(define (in-surrogate-range num)
|
||||
(and (>= num #xd800)
|
||||
(<= num #xdfff)))
|
||||
|
||||
(unless (or (not tree)
|
||||
(null? tree)
|
||||
(eq? '@comment (car tree)))
|
||||
|
@ -76,7 +72,7 @@
|
|||
|
||||
(with-output-to-file file
|
||||
(λ ()
|
||||
(display ";; Code generated by script/generate. DO NOT EDIT\n\n")
|
||||
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line)))
|
||||
|
||||
(for-each process-emoji-line (line-func))
|
||||
|
||||
|
|
145
scripts/generate-graphemes.in
Normal file
145
scripts/generate-graphemes.in
Normal file
|
@ -0,0 +1,145 @@
|
|||
#!@GUILE@ --no-auto-compile
|
||||
-*- scheme -*-
|
||||
!#
|
||||
|
||||
;; Can be called with a trailing argument pointing to the file on disk.
|
||||
|
||||
(use-modules
|
||||
(runewidth internal)
|
||||
(ice-9 pretty-print)
|
||||
(ice-9 peg)
|
||||
(ice-9 format)
|
||||
(ice-9 exceptions)
|
||||
(ice-9 match)
|
||||
(ice-9 hash-table)
|
||||
(srfi srfi-1))
|
||||
|
||||
(define stdout (current-output-port))
|
||||
|
||||
(define grapheme-url
|
||||
"https://www.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt")
|
||||
|
||||
(define-peg-pattern @grapheme-category all
|
||||
(* (peg "[a-zA-Z_]")))
|
||||
|
||||
(define-peg-pattern @grapheme-datum body
|
||||
(and @codepoint-range (* @ws) (ignore ";") (* @ws) @grapheme-category))
|
||||
|
||||
(define-peg-pattern @grapheme-line body
|
||||
(and @grapheme-datum (* @ws) @comment))
|
||||
|
||||
(define grapheme-ht (make-hash-table 13))
|
||||
|
||||
(define grapheme-sets
|
||||
'(hangul-syllable-l
|
||||
hangul-syllable-v
|
||||
hangul-syllable-lv
|
||||
hangul-syllable-lvt
|
||||
prepend
|
||||
carriage-return
|
||||
line-feed
|
||||
control
|
||||
extend
|
||||
regional-indicator
|
||||
spacing-mark
|
||||
zerowidth-joiner))
|
||||
|
||||
(define grapheme-symbol-names
|
||||
(map
|
||||
(λ (set)
|
||||
(string->symbol
|
||||
(string-concatenate
|
||||
(list "char-set:grapheme-"
|
||||
(symbol->string set)))))
|
||||
grapheme-sets))
|
||||
|
||||
(define grapheme-sets-and-symbols
|
||||
(zip grapheme-sets grapheme-symbol-names))
|
||||
|
||||
(define (process-grapheme-line line)
|
||||
(define (string->category str)
|
||||
(match str
|
||||
("L" 'hangul-syllable-l)
|
||||
("V" 'hangul-syllable-v)
|
||||
("T" 'hangul-syllable-t)
|
||||
("LV" 'hangul-syllable-lv)
|
||||
("LVT" 'hangul-syllable-lvt)
|
||||
("Prepend" 'prepend)
|
||||
("CR" 'carriage-return)
|
||||
("LF" 'line-feed)
|
||||
("Control" 'control)
|
||||
("Extend" 'extend)
|
||||
("Regional_Indicator" 'regional-indicator)
|
||||
("SpacingMark" 'spacing-mark)
|
||||
("ZWJ" 'zerowidth-joiner)))
|
||||
|
||||
(define tree (peg:tree (match-pattern @grapheme-line line)))
|
||||
|
||||
(unless (or (not tree)
|
||||
(null? tree)
|
||||
(eq? '@comment (car tree)))
|
||||
|
||||
(match tree
|
||||
(((('@codepoint-range
|
||||
('@codepoint codepoints) ...)
|
||||
('@grapheme-category cat-str))
|
||||
('@comment comment))
|
||||
|
||||
(with-exception-handler
|
||||
(λ (err)
|
||||
(format stdout "Skipping line due to error :: ")
|
||||
(format-exception-msg stdout err))
|
||||
(λ ()
|
||||
(let ((f (hex-string->integer (first codepoints)))
|
||||
(l (hex-string->integer (last codepoints)))
|
||||
(category (string->category cat-str)))
|
||||
|
||||
(when (or (in-surrogate-range f)
|
||||
(in-surrogate-range l))
|
||||
(error (format #f "chars in surrogate range ~x -> ~x" f l)))
|
||||
|
||||
(cons-hash-list! grapheme-ht category f l)))
|
||||
#:unwind? #t)))))
|
||||
|
||||
(define line-func
|
||||
(if (= 2 (length (command-line)))
|
||||
(λ ()
|
||||
(file-to-lines (last (command-line)) stdout))
|
||||
(λ ()
|
||||
(wget-to-lines grapheme-url stdout))))
|
||||
|
||||
(define file "runewidth/graphemes.scm")
|
||||
|
||||
(format stdout "Writing to ~a...\n" file)
|
||||
|
||||
(with-output-to-file file
|
||||
(λ ()
|
||||
(format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line)))
|
||||
|
||||
(for-each process-grapheme-line (line-func))
|
||||
|
||||
(pretty-print
|
||||
`(define-module (runewidth graphemes)
|
||||
#:use-module (ice-9 hash-table)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (runewidth internal)
|
||||
#:export ,grapheme-symbol-names))
|
||||
|
||||
|
||||
(pretty-print
|
||||
`(define grapheme-ht
|
||||
(alist->hashq-table ',(hash-map->list cons grapheme-ht))))
|
||||
|
||||
(display "\n")
|
||||
|
||||
(for-each
|
||||
(λ (set-pair)
|
||||
(let ((name (first set-pair))
|
||||
(symbol (second set-pair)))
|
||||
(pretty-print
|
||||
`(ranges->charset! grapheme-ht ',name ,symbol))))
|
||||
grapheme-sets-and-symbols)
|
||||
|
||||
(display "Code generation complete.\n" stdout)))
|
||||
|
||||
(format stdout "Written to ~a.\n" file)
|
Loading…
Reference in a new issue