diff --git a/guix.scm b/guix.scm index a07615a..b2406e8 100644 --- a/guix.scm +++ b/guix.scm @@ -80,7 +80,9 @@ "GUILE_LOAD_COMPILED_PATH" (compiled-dir out version) (compiled-dir "" version)))) - ,''("generate-east-asian" "generate-emoji")) + ,''("generate-east-asian" + "generate-emoji" + "generate-graphemes")) #t)))))))) (native-inputs (list autoconf automake pkg-config texinfo)) diff --git a/hall.scm b/hall.scm index 23d8ca1..ce1b7e5 100644 --- a/hall.scm +++ b/hall.scm @@ -25,6 +25,7 @@ ((scheme-file "emoji") (directory "eastasian" ((scheme-file "locale"))) (scheme-file "eastasian") + (scheme-file "graphemes") (scheme-file "internal"))))) (tests ((directory "tests" @@ -33,7 +34,8 @@ ((directory "scripts" ((in-file "generate-east-asian") - (in-file "generate-emoji"))))) + (in-file "generate-emoji") + (in-file "generate-graphemes"))))) (documentation ((org-file "README") (symlink "README" "README.org") diff --git a/runewidth/eastasian.scm b/runewidth/eastasian.scm index d8c4623..d1e2cb5 100644 --- a/runewidth/eastasian.scm +++ b/runewidth/eastasian.scm @@ -1,4 +1,4 @@ -;; Code generated by script/generate. DO NOT EDIT +;; Code generated by scripts/generate-east-asian. DO NOT EDIT (define-module (runewidth eastasian) @@ -6,6 +6,8 @@ (ice-9 hash-table) #:use-module (srfi srfi-1) + #:use-module + (runewidth internal) #:export (char-set:eastasian-combining char-set:eastasian-doublewidth @@ -13,285 +15,61 @@ char-set:eastasian-narrow char-set:eastasian-neutral char-set:eastasian-ambiguous)) -(define chars-ht +(define eastasian-ht (alist->hashq-table - '((doublewidth - (201547 262141) - (196608 201546) - (195104 196605) - (195102 195103) - (194560 195101) - (191457 194559) - (183984 191456) - (183970 183983) - (178208 183969) - (178206 178207) - (177984 178205) - (177973 177983) - (173824 177972) - (173790 173823) - (131072 173789) - (129744 129750) - (129728 129730) - (129712 129718) - (129680 129704) - (129664 129670) - (129656 129658) - (129648 129652) - (129485 129535) - (129402 129483) - (129351 129400) - (129340 129349) - (129292 129338) - (128992 129003) - (128756 128764) - (128747 128748) - (128725 128727) - (128720 128722) - (128716 128716) - (128640 128709) - (128512 128591) - (128507 128511) - (128420 128420) - (128405 128406) - (128378 128378) - (128336 128359) - (128331 128334) - (128255 128317) - (128066 128252) - (128064 128064) - (128000 128062) - (127995 127999) - (127992 127994) - (127988 127988) - (127968 127984) - (127951 127955) - (127904 127946) - (127870 127891) - (127799 127868) - (127789 127797) - (127744 127776) - (127584 127589) - (127568 127569) - (127552 127560) - (127504 127547) - (127488 127490) - (127377 127386) - (127374 127374) - (127183 127183) - (126980 126980) - (110960 111355) - (110948 110951) - (110928 110930) - (110848 110878) - (110592 110847) - (101632 101640) - (101120 101589) - (100352 101119) - (94208 100343) - (94192 94193) - (94180 94180) - (94179 94179) - (94178 94178) - (94176 94177) - (65509 65510) - (65508 65508) - (65507 65507) - (65506 65506) - (65504 65505) - (65376 65376) - (65375 65375) - (65374 65374) - (65373 65373) - (65372 65372) - (65371 65371) - (65345 65370) - (65344 65344) - (65343 65343) - (65342 65342) - (65341 65341) - (65340 65340) - (65339 65339) - (65313 65338) - (65311 65312) - (65308 65310) - (65306 65307) - (65296 65305) - (65294 65295) - (65293 65293) - (65292 65292) - (65291 65291) - (65290 65290) - (65289 65289) - (65288 65288) - (65285 65287) - (65284 65284) - (65281 65283) - (65130 65131) - (65129 65129) - (65128 65128) - (65124 65126) - (65123 65123) - (65122 65122) - (65119 65121) - (65118 65118) - (65117 65117) - (65116 65116) - (65115 65115) - (65114 65114) - (65113 65113) - (65112 65112) - (65108 65111) - (65104 65106) - (65101 65103) - (65097 65100) - (65096 65096) - (65095 65095) - (65093 65094) - (65092 65092) - (65091 65091) - (65090 65090) - (65089 65089) - (65088 65088) - (65087 65087) - (65086 65086) - (65085 65085) - (65084 65084) - (65083 65083) - (65082 65082) - (65081 65081) - (65080 65080) - (65079 65079) - (65078 65078) - (65077 65077) - (65075 65076) - (65073 65074) - (65072 65072) - (65049 65049) - (65048 65048) - (65047 65047) - (65040 65046) - (64218 64255) - (64112 64217) - (64110 64111) - (63744 64109) - (44032 55203) - (43360 43388) - (42128 42182) - (40982 42124) - (40981 40981) - (40960 40980) - (40957 40959) - (19968 40956) - (13312 19903) - (13056 13311) - (12992 13055) - (12977 12991) - (12938 12976) - (12928 12937) - (12896 12927) - (12881 12895) - (12880 12880) - (12842 12871) - (12832 12841) - (12800 12830) - (12784 12799) - (12736 12771) - (12704 12735) - (12694 12703) - (12690 12693) - (12688 12689) - (12593 12686) - (12549 12591) - (12543 12543) - (12540 12542) - (12539 12539) - (12449 12538) - (12448 12448) - (12447 12447) - (12445 12446) - (12443 12444) - (12353 12438) - (12350 12350) - (12349 12349) - (12348 12348) - (12347 12347) - (12344 12346) - (12342 12343) - (12337 12341) - (12336 12336) - (12334 12335) - (12330 12333) - (12321 12329) - (12320 12320) - (12318 12319) - (12317 12317) - (12316 12316) - (12315 12315) - (12314 12314) - (12313 12313) - (12312 12312) - (12311 12311) - (12310 12310) - (12309 12309) - (12308 12308) - (12306 12307) - (12305 12305) - (12304 12304) - (12303 12303) - (12302 12302) - (12301 12301) - (12300 12300) - (12299 12299) - (12298 12298) - (12297 12297) - (12296 12296) - (12295 12295) - (12294 12294) - (12293 12293) - (12292 12292) - (12289 12291) - (12288 12288) - (12272 12283) - (12032 12245) - (11931 12019) - (11904 11929) - (11093 11093) - (11088 11088) - (11035 11036) - (10175 10175) - (10160 10160) - (10133 10135) - (10071 10071) - (10067 10069) - (10062 10062) - (10060 10060) - (10024 10024) - (9994 9995) - (9989 9989) - (9981 9981) - (9978 9978) - (9973 9973) - (9970 9971) - (9962 9962) - (9940 9940) - (9934 9934) - (9924 9925) - (9917 9918) - (9898 9899) - (9889 9889) - (9875 9875) - (9855 9855) - (9800 9811) - (9748 9749) - (9725 9726) - (9203 9203) - (9200 9200) - (9193 9196) - (9002 9002) - (9001 9001) - (8986 8987) - (4352 4447)) + '((combining + (125136 125142) + (122918 122922) + (122915 122916) + (122907 122913) + (122888 122904) + (122880 122886) + (119362 119364) + (119210 119213) + (119173 119179) + (119163 119170) + (119149 119154) + (119143 119145) + (119141 119142) + (92912 92916) + (70512 70516) + (70502 70508) + (70459 70460) + (70400 70401) + (69446 69456) + (69291 69292) + (66422 66426) + (66045 66045) + (65056 65071) + (43232 43249) + (42736 42737) + (42654 42655) + (42612 42621) + (42608 42610) + (42607 42607) + (12441 12442) + (11744 11775) + (11503 11505) + (8421 8432) + (8418 8420) + (8417 8417) + (8413 8416) + (8400 8412) + (7675 7679) + (7616 7673) + (7019 7027) + (6847 6848) + (6846 6846) + (6832 6845) + (6783 6783) + (4957 4959) + (3328 3329) + (3076 3076) + (3072 3072) + (2027 2035) + (1160 1161) + (1155 1159) + (768 879)) (ambiguous (1048576 1114109) (983040 1048573) @@ -490,59 +268,67 @@ (167 167) (164 164) (161 161)) - (combining - (125136 125142) - (122918 122922) - (122915 122916) - (122907 122913) - (122888 122904) - (122880 122886) - (119362 119364) - (119210 119213) - (119173 119179) - (119163 119170) - (119149 119154) - (119143 119145) - (119141 119142) - (92912 92916) - (70512 70516) - (70502 70508) - (70459 70460) - (70400 70401) - (69446 69456) - (69291 69292) - (66422 66426) - (66045 66045) - (65056 65071) - (43232 43249) - (42736 42737) - (42654 42655) - (42612 42621) - (42608 42610) - (42607 42607) - (12441 12442) - (11744 11775) - (11503 11505) - (8421 8432) - (8418 8420) - (8417 8417) - (8413 8416) - (8400 8412) - (7675 7679) - (7616 7673) - (7019 7027) - (6847 6848) - (6846 6846) - (6832 6845) - (6783 6783) - (4957 4959) - (3328 3329) - (3076 3076) - (3072 3072) - (2027 2035) - (1160 1161) - (1155 1159) - (768 879)) + (halfwidth + (65517 65518) + (65513 65516) + (65512 65512) + (65498 65500) + (65490 65495) + (65482 65487) + (65474 65479) + (65440 65470) + (65438 65439) + (65393 65437) + (65392 65392) + (65382 65391) + (65380 65381) + (65379 65379) + (65378 65378) + (65377 65377) + (8361 8361)) + (narrow + (10630 10630) + (10629 10629) + (10221 10221) + (10220 10220) + (10219 10219) + (10218 10218) + (10217 10217) + (10216 10216) + (10215 10215) + (10214 10214) + (175 175) + (172 172) + (166 166) + (165 165) + (162 163) + (126 126) + (125 125) + (124 124) + (123 123) + (97 122) + (96 96) + (95 95) + (94 94) + (93 93) + (92 92) + (91 91) + (65 90) + (63 64) + (60 62) + (58 59) + (48 57) + (46 47) + (45 45) + (44 44) + (43 43) + (42 42) + (41 41) + (40 40) + (37 39) + (36 36) + (33 35) + (32 32)) (neutral (917536 917631) (917505 917505) @@ -2435,79 +2221,283 @@ (128 159) (127 127) (0 31)) - (narrow - (10630 10630) - (10629 10629) - (10221 10221) - (10220 10220) - (10219 10219) - (10218 10218) - (10217 10217) - (10216 10216) - (10215 10215) - (10214 10214) - (175 175) - (172 172) - (166 166) - (165 165) - (162 163) - (126 126) - (125 125) - (124 124) - (123 123) - (97 122) - (96 96) - (95 95) - (94 94) - (93 93) - (92 92) - (91 91) - (65 90) - (63 64) - (60 62) - (58 59) - (48 57) - (46 47) - (45 45) - (44 44) - (43 43) - (42 42) - (41 41) - (40 40) - (37 39) - (36 36) - (33 35) - (32 32)) - (halfwidth - (65517 65518) - (65513 65516) - (65512 65512) - (65498 65500) - (65490 65495) - (65482 65487) - (65474 65479) - (65440 65470) - (65438 65439) - (65393 65437) - (65392 65392) - (65382 65391) - (65380 65381) - (65379 65379) - (65378 65378) - (65377 65377) - (8361 8361))))) - -(define-syntax-rule - (ranges->charset! name symbol) - (let* ((pairs (hashq-ref chars-ht name))) - (for-each - (λ (pair) - (ucs-range->char-set! - (first pair) - (+ (second pair) 1) - #t - symbol)) - pairs))) + (doublewidth + (201547 262141) + (196608 201546) + (195104 196605) + (195102 195103) + (194560 195101) + (191457 194559) + (183984 191456) + (183970 183983) + (178208 183969) + (178206 178207) + (177984 178205) + (177973 177983) + (173824 177972) + (173790 173823) + (131072 173789) + (129744 129750) + (129728 129730) + (129712 129718) + (129680 129704) + (129664 129670) + (129656 129658) + (129648 129652) + (129485 129535) + (129402 129483) + (129351 129400) + (129340 129349) + (129292 129338) + (128992 129003) + (128756 128764) + (128747 128748) + (128725 128727) + (128720 128722) + (128716 128716) + (128640 128709) + (128512 128591) + (128507 128511) + (128420 128420) + (128405 128406) + (128378 128378) + (128336 128359) + (128331 128334) + (128255 128317) + (128066 128252) + (128064 128064) + (128000 128062) + (127995 127999) + (127992 127994) + (127988 127988) + (127968 127984) + (127951 127955) + (127904 127946) + (127870 127891) + (127799 127868) + (127789 127797) + (127744 127776) + (127584 127589) + (127568 127569) + (127552 127560) + (127504 127547) + (127488 127490) + (127377 127386) + (127374 127374) + (127183 127183) + (126980 126980) + (110960 111355) + (110948 110951) + (110928 110930) + (110848 110878) + (110592 110847) + (101632 101640) + (101120 101589) + (100352 101119) + (94208 100343) + (94192 94193) + (94180 94180) + (94179 94179) + (94178 94178) + (94176 94177) + (65509 65510) + (65508 65508) + (65507 65507) + (65506 65506) + (65504 65505) + (65376 65376) + (65375 65375) + (65374 65374) + (65373 65373) + (65372 65372) + (65371 65371) + (65345 65370) + (65344 65344) + (65343 65343) + (65342 65342) + (65341 65341) + (65340 65340) + (65339 65339) + (65313 65338) + (65311 65312) + (65308 65310) + (65306 65307) + (65296 65305) + (65294 65295) + (65293 65293) + (65292 65292) + (65291 65291) + (65290 65290) + (65289 65289) + (65288 65288) + (65285 65287) + (65284 65284) + (65281 65283) + (65130 65131) + (65129 65129) + (65128 65128) + (65124 65126) + (65123 65123) + (65122 65122) + (65119 65121) + (65118 65118) + (65117 65117) + (65116 65116) + (65115 65115) + (65114 65114) + (65113 65113) + (65112 65112) + (65108 65111) + (65104 65106) + (65101 65103) + (65097 65100) + (65096 65096) + (65095 65095) + (65093 65094) + (65092 65092) + (65091 65091) + (65090 65090) + (65089 65089) + (65088 65088) + (65087 65087) + (65086 65086) + (65085 65085) + (65084 65084) + (65083 65083) + (65082 65082) + (65081 65081) + (65080 65080) + (65079 65079) + (65078 65078) + (65077 65077) + (65075 65076) + (65073 65074) + (65072 65072) + (65049 65049) + (65048 65048) + (65047 65047) + (65040 65046) + (64218 64255) + (64112 64217) + (64110 64111) + (63744 64109) + (44032 55203) + (43360 43388) + (42128 42182) + (40982 42124) + (40981 40981) + (40960 40980) + (40957 40959) + (19968 40956) + (13312 19903) + (13056 13311) + (12992 13055) + (12977 12991) + (12938 12976) + (12928 12937) + (12896 12927) + (12881 12895) + (12880 12880) + (12842 12871) + (12832 12841) + (12800 12830) + (12784 12799) + (12736 12771) + (12704 12735) + (12694 12703) + (12690 12693) + (12688 12689) + (12593 12686) + (12549 12591) + (12543 12543) + (12540 12542) + (12539 12539) + (12449 12538) + (12448 12448) + (12447 12447) + (12445 12446) + (12443 12444) + (12353 12438) + (12350 12350) + (12349 12349) + (12348 12348) + (12347 12347) + (12344 12346) + (12342 12343) + (12337 12341) + (12336 12336) + (12334 12335) + (12330 12333) + (12321 12329) + (12320 12320) + (12318 12319) + (12317 12317) + (12316 12316) + (12315 12315) + (12314 12314) + (12313 12313) + (12312 12312) + (12311 12311) + (12310 12310) + (12309 12309) + (12308 12308) + (12306 12307) + (12305 12305) + (12304 12304) + (12303 12303) + (12302 12302) + (12301 12301) + (12300 12300) + (12299 12299) + (12298 12298) + (12297 12297) + (12296 12296) + (12295 12295) + (12294 12294) + (12293 12293) + (12292 12292) + (12289 12291) + (12288 12288) + (12272 12283) + (12032 12245) + (11931 12019) + (11904 11929) + (11093 11093) + (11088 11088) + (11035 11036) + (10175 10175) + (10160 10160) + (10133 10135) + (10071 10071) + (10067 10069) + (10062 10062) + (10060 10060) + (10024 10024) + (9994 9995) + (9989 9989) + (9981 9981) + (9978 9978) + (9973 9973) + (9970 9971) + (9962 9962) + (9940 9940) + (9934 9934) + (9924 9925) + (9917 9918) + (9898 9899) + (9889 9889) + (9875 9875) + (9855 9855) + (9800 9811) + (9748 9749) + (9725 9726) + (9203 9203) + (9200 9200) + (9193 9196) + (9002 9002) + (9001 9001) + (8986 8987) + (4352 4447))))) (define char-set:eastasian-combining (char-set)) (define char-set:eastasian-doublewidth @@ -2518,20 +2508,26 @@ (define char-set:eastasian-ambiguous (char-set)) (ranges->charset! + eastasian-ht 'combining char-set:eastasian-combining) (ranges->charset! + eastasian-ht 'doublewidth char-set:eastasian-doublewidth) (ranges->charset! + eastasian-ht 'halfwidth char-set:eastasian-halfwidth) (ranges->charset! + eastasian-ht 'narrow char-set:eastasian-narrow) (ranges->charset! + eastasian-ht 'neutral char-set:eastasian-neutral) (ranges->charset! + eastasian-ht 'ambiguous char-set:eastasian-ambiguous) diff --git a/runewidth/emoji.scm b/runewidth/emoji.scm index f198c56..6a433f6 100644 --- a/runewidth/emoji.scm +++ b/runewidth/emoji.scm @@ -1,4 +1,4 @@ -;; Code generated by script/generate. DO NOT EDIT +;; Code generated by scripts/generate-emoji. DO NOT EDIT (define-module (runewidth emoji) diff --git a/runewidth/graphemes.scm b/runewidth/graphemes.scm new file mode 100644 index 0000000..cffda53 --- /dev/null +++ b/runewidth/graphemes.scm @@ -0,0 +1,1468 @@ +;; Code generated by scripts/generate-graphemes. DO NOT EDIT + +(define-module + (runewidth graphemes) + #:use-module + (ice-9 hash-table) + #:use-module + (srfi srfi-1) + #:use-module + (runewidth internal) + #:export + (char-set:grapheme-hangul-syllable-l + char-set:grapheme-hangul-syllable-v + char-set:grapheme-hangul-syllable-lv + char-set:grapheme-hangul-syllable-lvt + char-set:grapheme-prepend + char-set:grapheme-carriage-return + char-set:grapheme-line-feed + char-set:grapheme-control + char-set:grapheme-extend + char-set:grapheme-regional-indicator + char-set:grapheme-spacing-mark + char-set:grapheme-zerowidth-joiner)) +(define grapheme-ht + (alist->hashq-table + '((regional-indicator (127462 127487)) + (hangul-syllable-lv + (55176 55176) + (55148 55148) + (55120 55120) + (55092 55092) + (55064 55064) + (55036 55036) + (55008 55008) + (54980 54980) + (54952 54952) + (54924 54924) + (54896 54896) + (54868 54868) + (54840 54840) + (54812 54812) + (54784 54784) + (54756 54756) + (54728 54728) + (54700 54700) + (54672 54672) + (54644 54644) + (54616 54616) + (54588 54588) + (54560 54560) + (54532 54532) + (54504 54504) + (54476 54476) + (54448 54448) + (54420 54420) + (54392 54392) + (54364 54364) + (54336 54336) + (54308 54308) + (54280 54280) + (54252 54252) + (54224 54224) + (54196 54196) + (54168 54168) + (54140 54140) + (54112 54112) + (54084 54084) + (54056 54056) + (54028 54028) + (54000 54000) + (53972 53972) + (53944 53944) + (53916 53916) + (53888 53888) + (53860 53860) + (53832 53832) + (53804 53804) + (53776 53776) + (53748 53748) + (53720 53720) + (53692 53692) + (53664 53664) + (53636 53636) + (53608 53608) + (53580 53580) + (53552 53552) + (53524 53524) + (53496 53496) + (53468 53468) + (53440 53440) + (53412 53412) + (53384 53384) + (53356 53356) + (53328 53328) + (53300 53300) + (53272 53272) + (53244 53244) + (53216 53216) + (53188 53188) + (53160 53160) + (53132 53132) + (53104 53104) + (53076 53076) + (53048 53048) + (53020 53020) + (52992 52992) + (52964 52964) + (52936 52936) + (52908 52908) + (52880 52880) + (52852 52852) + (52824 52824) + (52796 52796) + (52768 52768) + (52740 52740) + (52712 52712) + (52684 52684) + (52656 52656) + (52628 52628) + (52600 52600) + (52572 52572) + (52544 52544) + (52516 52516) + (52488 52488) + (52460 52460) + (52432 52432) + (52404 52404) + (52376 52376) + (52348 52348) + (52320 52320) + (52292 52292) + (52264 52264) + (52236 52236) + (52208 52208) + (52180 52180) + (52152 52152) + (52124 52124) + (52096 52096) + (52068 52068) + (52040 52040) + (52012 52012) + (51984 51984) + (51956 51956) + (51928 51928) + (51900 51900) + (51872 51872) + (51844 51844) + (51816 51816) + (51788 51788) + (51760 51760) + (51732 51732) + (51704 51704) + (51676 51676) + (51648 51648) + (51620 51620) + (51592 51592) + (51564 51564) + (51536 51536) + (51508 51508) + (51480 51480) + (51452 51452) + (51424 51424) + (51396 51396) + (51368 51368) + (51340 51340) + (51312 51312) + (51284 51284) + (51256 51256) + (51228 51228) + (51200 51200) + (51172 51172) + (51144 51144) + (51116 51116) + (51088 51088) + (51060 51060) + (51032 51032) + (51004 51004) + (50976 50976) + (50948 50948) + (50920 50920) + (50892 50892) + (50864 50864) + (50836 50836) + (50808 50808) + (50780 50780) + (50752 50752) + (50724 50724) + (50696 50696) + (50668 50668) + (50640 50640) + (50612 50612) + (50584 50584) + (50556 50556) + (50528 50528) + (50500 50500) + (50472 50472) + (50444 50444) + (50416 50416) + (50388 50388) + (50360 50360) + (50332 50332) + (50304 50304) + (50276 50276) + (50248 50248) + (50220 50220) + (50192 50192) + (50164 50164) + (50136 50136) + (50108 50108) + (50080 50080) + (50052 50052) + (50024 50024) + (49996 49996) + (49968 49968) + (49940 49940) + (49912 49912) + (49884 49884) + (49856 49856) + (49828 49828) + (49800 49800) + (49772 49772) + (49744 49744) + (49716 49716) + (49688 49688) + (49660 49660) + (49632 49632) + (49604 49604) + (49576 49576) + (49548 49548) + (49520 49520) + (49492 49492) + (49464 49464) + (49436 49436) + (49408 49408) + (49380 49380) + (49352 49352) + (49324 49324) + (49296 49296) + (49268 49268) + (49240 49240) + (49212 49212) + (49184 49184) + (49156 49156) + (49128 49128) + (49100 49100) + (49072 49072) + (49044 49044) + (49016 49016) + (48988 48988) + (48960 48960) + (48932 48932) + (48904 48904) + (48876 48876) + (48848 48848) + (48820 48820) + (48792 48792) + (48764 48764) + (48736 48736) + (48708 48708) + (48680 48680) + (48652 48652) + (48624 48624) + (48596 48596) + (48568 48568) + (48540 48540) + (48512 48512) + (48484 48484) + (48456 48456) + (48428 48428) + (48400 48400) + (48372 48372) + (48344 48344) + (48316 48316) + (48288 48288) + (48260 48260) + (48232 48232) + (48204 48204) + (48176 48176) + (48148 48148) + (48120 48120) + (48092 48092) + (48064 48064) + (48036 48036) + (48008 48008) + (47980 47980) + (47952 47952) + (47924 47924) + (47896 47896) + (47868 47868) + (47840 47840) + (47812 47812) + (47784 47784) + (47756 47756) + (47728 47728) + (47700 47700) + (47672 47672) + (47644 47644) + (47616 47616) + (47588 47588) + (47560 47560) + (47532 47532) + (47504 47504) + (47476 47476) + (47448 47448) + (47420 47420) + (47392 47392) + (47364 47364) + (47336 47336) + (47308 47308) + (47280 47280) + (47252 47252) + (47224 47224) + (47196 47196) + (47168 47168) + (47140 47140) + (47112 47112) + (47084 47084) + (47056 47056) + (47028 47028) + (47000 47000) + (46972 46972) + (46944 46944) + (46916 46916) + (46888 46888) + (46860 46860) + (46832 46832) + (46804 46804) + (46776 46776) + (46748 46748) + (46720 46720) + (46692 46692) + (46664 46664) + (46636 46636) + (46608 46608) + (46580 46580) + (46552 46552) + (46524 46524) + (46496 46496) + (46468 46468) + (46440 46440) + (46412 46412) + (46384 46384) + (46356 46356) + (46328 46328) + (46300 46300) + (46272 46272) + (46244 46244) + (46216 46216) + (46188 46188) + (46160 46160) + (46132 46132) + (46104 46104) + (46076 46076) + (46048 46048) + (46020 46020) + (45992 45992) + (45964 45964) + (45936 45936) + (45908 45908) + (45880 45880) + (45852 45852) + (45824 45824) + (45796 45796) + (45768 45768) + (45740 45740) + (45712 45712) + (45684 45684) + (45656 45656) + (45628 45628) + (45600 45600) + (45572 45572) + (45544 45544) + (45516 45516) + (45488 45488) + (45460 45460) + (45432 45432) + (45404 45404) + (45376 45376) + (45348 45348) + (45320 45320) + (45292 45292) + (45264 45264) + (45236 45236) + (45208 45208) + (45180 45180) + (45152 45152) + (45124 45124) + (45096 45096) + (45068 45068) + (45040 45040) + (45012 45012) + (44984 44984) + (44956 44956) + (44928 44928) + (44900 44900) + (44872 44872) + (44844 44844) + (44816 44816) + (44788 44788) + (44760 44760) + (44732 44732) + (44704 44704) + (44676 44676) + (44648 44648) + (44620 44620) + (44592 44592) + (44564 44564) + (44536 44536) + (44508 44508) + (44480 44480) + (44452 44452) + (44424 44424) + (44396 44396) + (44368 44368) + (44340 44340) + (44312 44312) + (44284 44284) + (44256 44256) + (44228 44228) + (44200 44200) + (44172 44172) + (44144 44144) + (44116 44116) + (44088 44088) + (44060 44060) + (44032 44032)) + (hangul-syllable-lvt + (55177 55203) + (55149 55175) + (55121 55147) + (55093 55119) + (55065 55091) + (55037 55063) + (55009 55035) + (54981 55007) + (54953 54979) + (54925 54951) + (54897 54923) + (54869 54895) + (54841 54867) + (54813 54839) + (54785 54811) + (54757 54783) + (54729 54755) + (54701 54727) + (54673 54699) + (54645 54671) + (54617 54643) + (54589 54615) + (54561 54587) + (54533 54559) + (54505 54531) + (54477 54503) + (54449 54475) + (54421 54447) + (54393 54419) + (54365 54391) + (54337 54363) + (54309 54335) + (54281 54307) + (54253 54279) + (54225 54251) + (54197 54223) + (54169 54195) + (54141 54167) + (54113 54139) + (54085 54111) + (54057 54083) + (54029 54055) + (54001 54027) + (53973 53999) + (53945 53971) + (53917 53943) + (53889 53915) + (53861 53887) + (53833 53859) + (53805 53831) + (53777 53803) + (53749 53775) + (53721 53747) + (53693 53719) + (53665 53691) + (53637 53663) + (53609 53635) + (53581 53607) + (53553 53579) + (53525 53551) + (53497 53523) + (53469 53495) + (53441 53467) + (53413 53439) + (53385 53411) + (53357 53383) + (53329 53355) + (53301 53327) + (53273 53299) + (53245 53271) + (53217 53243) + (53189 53215) + (53161 53187) + (53133 53159) + (53105 53131) + (53077 53103) + (53049 53075) + (53021 53047) + (52993 53019) + (52965 52991) + (52937 52963) + (52909 52935) + (52881 52907) + (52853 52879) + (52825 52851) + (52797 52823) + (52769 52795) + (52741 52767) + (52713 52739) + (52685 52711) + (52657 52683) + (52629 52655) + (52601 52627) + (52573 52599) + (52545 52571) + (52517 52543) + (52489 52515) + (52461 52487) + (52433 52459) + (52405 52431) + (52377 52403) + (52349 52375) + (52321 52347) + (52293 52319) + (52265 52291) + (52237 52263) + (52209 52235) + (52181 52207) + (52153 52179) + (52125 52151) + (52097 52123) + (52069 52095) + (52041 52067) + (52013 52039) + (51985 52011) + (51957 51983) + (51929 51955) + (51901 51927) + (51873 51899) + (51845 51871) + (51817 51843) + (51789 51815) + (51761 51787) + (51733 51759) + (51705 51731) + (51677 51703) + (51649 51675) + (51621 51647) + (51593 51619) + (51565 51591) + (51537 51563) + (51509 51535) + (51481 51507) + (51453 51479) + (51425 51451) + (51397 51423) + (51369 51395) + (51341 51367) + (51313 51339) + (51285 51311) + (51257 51283) + (51229 51255) + (51201 51227) + (51173 51199) + (51145 51171) + (51117 51143) + (51089 51115) + (51061 51087) + (51033 51059) + (51005 51031) + (50977 51003) + (50949 50975) + (50921 50947) + (50893 50919) + (50865 50891) + (50837 50863) + (50809 50835) + (50781 50807) + (50753 50779) + (50725 50751) + (50697 50723) + (50669 50695) + (50641 50667) + (50613 50639) + (50585 50611) + (50557 50583) + (50529 50555) + (50501 50527) + (50473 50499) + (50445 50471) + (50417 50443) + (50389 50415) + (50361 50387) + (50333 50359) + (50305 50331) + (50277 50303) + (50249 50275) + (50221 50247) + (50193 50219) + (50165 50191) + (50137 50163) + (50109 50135) + (50081 50107) + (50053 50079) + (50025 50051) + (49997 50023) + (49969 49995) + (49941 49967) + (49913 49939) + (49885 49911) + (49857 49883) + (49829 49855) + (49801 49827) + (49773 49799) + (49745 49771) + (49717 49743) + (49689 49715) + (49661 49687) + (49633 49659) + (49605 49631) + (49577 49603) + (49549 49575) + (49521 49547) + (49493 49519) + (49465 49491) + (49437 49463) + (49409 49435) + (49381 49407) + (49353 49379) + (49325 49351) + (49297 49323) + (49269 49295) + (49241 49267) + (49213 49239) + (49185 49211) + (49157 49183) + (49129 49155) + (49101 49127) + (49073 49099) + (49045 49071) + (49017 49043) + (48989 49015) + (48961 48987) + (48933 48959) + (48905 48931) + (48877 48903) + (48849 48875) + (48821 48847) + (48793 48819) + (48765 48791) + (48737 48763) + (48709 48735) + (48681 48707) + (48653 48679) + (48625 48651) + (48597 48623) + (48569 48595) + (48541 48567) + (48513 48539) + (48485 48511) + (48457 48483) + (48429 48455) + (48401 48427) + (48373 48399) + (48345 48371) + (48317 48343) + (48289 48315) + (48261 48287) + (48233 48259) + (48205 48231) + (48177 48203) + (48149 48175) + (48121 48147) + (48093 48119) + (48065 48091) + (48037 48063) + (48009 48035) + (47981 48007) + (47953 47979) + (47925 47951) + (47897 47923) + (47869 47895) + (47841 47867) + (47813 47839) + (47785 47811) + (47757 47783) + (47729 47755) + (47701 47727) + (47673 47699) + (47645 47671) + (47617 47643) + (47589 47615) + (47561 47587) + (47533 47559) + (47505 47531) + (47477 47503) + (47449 47475) + (47421 47447) + (47393 47419) + (47365 47391) + (47337 47363) + (47309 47335) + (47281 47307) + (47253 47279) + (47225 47251) + (47197 47223) + (47169 47195) + (47141 47167) + (47113 47139) + (47085 47111) + (47057 47083) + (47029 47055) + (47001 47027) + (46973 46999) + (46945 46971) + (46917 46943) + (46889 46915) + (46861 46887) + (46833 46859) + (46805 46831) + (46777 46803) + (46749 46775) + (46721 46747) + (46693 46719) + (46665 46691) + (46637 46663) + (46609 46635) + (46581 46607) + (46553 46579) + (46525 46551) + (46497 46523) + (46469 46495) + (46441 46467) + (46413 46439) + (46385 46411) + (46357 46383) + (46329 46355) + (46301 46327) + (46273 46299) + (46245 46271) + (46217 46243) + (46189 46215) + (46161 46187) + (46133 46159) + (46105 46131) + (46077 46103) + (46049 46075) + (46021 46047) + (45993 46019) + (45965 45991) + (45937 45963) + (45909 45935) + (45881 45907) + (45853 45879) + (45825 45851) + (45797 45823) + (45769 45795) + (45741 45767) + (45713 45739) + (45685 45711) + (45657 45683) + (45629 45655) + (45601 45627) + (45573 45599) + (45545 45571) + (45517 45543) + (45489 45515) + (45461 45487) + (45433 45459) + (45405 45431) + (45377 45403) + (45349 45375) + (45321 45347) + (45293 45319) + (45265 45291) + (45237 45263) + (45209 45235) + (45181 45207) + (45153 45179) + (45125 45151) + (45097 45123) + (45069 45095) + (45041 45067) + (45013 45039) + (44985 45011) + (44957 44983) + (44929 44955) + (44901 44927) + (44873 44899) + (44845 44871) + (44817 44843) + (44789 44815) + (44761 44787) + (44733 44759) + (44705 44731) + (44677 44703) + (44649 44675) + (44621 44647) + (44593 44619) + (44565 44591) + (44537 44563) + (44509 44535) + (44481 44507) + (44453 44479) + (44425 44451) + (44397 44423) + (44369 44395) + (44341 44367) + (44313 44339) + (44285 44311) + (44257 44283) + (44229 44255) + (44201 44227) + (44173 44199) + (44145 44171) + (44117 44143) + (44089 44115) + (44061 44087) + (44033 44059)) + (prepend + (73474 73474) + (73030 73030) + (72324 72329) + (72250 72250) + (72001 72001) + (71999 71999) + (70082 70083) + (69837 69837) + (69821 69821) + (3406 3406) + (2274 2274) + (2192 2193) + (1807 1807) + (1757 1757) + (1536 1541)) + (hangul-syllable-l (43360 43388) (4352 4447)) + (hangul-syllable-v (55216 55238) (4448 4519)) + (spacing-mark + (119149 119149) + (119142 119142) + (94192 94193) + (94033 94087) + (73537 73537) + (73534 73535) + (73524 73525) + (73475 73475) + (73461 73462) + (73110 73110) + (73107 73108) + (73098 73102) + (72884 72884) + (72881 72881) + (72873 72873) + (72766 72766) + (72751 72751) + (72343 72343) + (72279 72280) + (72249 72249) + (72164 72164) + (72156 72159) + (72145 72147) + (72002 72002) + (72000 72000) + (71997 71997) + (71991 71992) + (71985 71989) + (71736 71736) + (71724 71726) + (71462 71462) + (71350 71350) + (71342 71343) + (71340 71340) + (71230 71230) + (71227 71228) + (71216 71218) + (71102 71102) + (71096 71099) + (71088 71089) + (70849 70849) + (70846 70846) + (70843 70844) + (70841 70841) + (70833 70834) + (70725 70725) + (70720 70721) + (70709 70711) + (70498 70499) + (70475 70477) + (70471 70472) + (70465 70468) + (70463 70463) + (70402 70403) + (70368 70370) + (70197 70197) + (70194 70195) + (70188 70190) + (70094 70094) + (70079 70080) + (70067 70069) + (70018 70018) + (69957 69958) + (69932 69932) + (69815 69816) + (69808 69810) + (69762 69762) + (69634 69634) + (69632 69632) + (44012 44012) + (44009 44010) + (44006 44007) + (44003 44004) + (43765 43765) + (43758 43759) + (43755 43755) + (43597 43597) + (43571 43572) + (43567 43568) + (43454 43456) + (43450 43451) + (43444 43445) + (43395 43395) + (43346 43347) + (43188 43203) + (43136 43137) + (43047 43047) + (43043 43044) + (7415 7415) + (7393 7393) + (7220 7221) + (7204 7211) + (7154 7155) + (7150 7150) + (7146 7148) + (7143 7143) + (7082 7082) + (7078 7079) + (7073 7073) + (7042 7042) + (6979 6980) + (6973 6977) + (6971 6971) + (6916 6916) + (6765 6770) + (6743 6743) + (6741 6741) + (6681 6682) + (6451 6456) + (6448 6449) + (6441 6443) + (6435 6438) + (6087 6088) + (6078 6085) + (6070 6070) + (5940 5940) + (5909 5909) + (4228 4228) + (4182 4183) + (4155 4156) + (4145 4145) + (3967 3967) + (3902 3903) + (3763 3763) + (3635 3635) + (3570 3571) + (3544 3550) + (3536 3537) + (3458 3459) + (3402 3404) + (3398 3400) + (3391 3392) + (3330 3331) + (3315 3315) + (3274 3275) + (3271 3272) + (3267 3268) + (3264 3265) + (3262 3262) + (3202 3203) + (3137 3140) + (3073 3075) + (3018 3020) + (3014 3016) + (3009 3010) + (3007 3007) + (2891 2892) + (2887 2888) + (2880 2880) + (2818 2819) + (2763 2764) + (2761 2761) + (2750 2752) + (2691 2691) + (2622 2624) + (2563 2563) + (2507 2508) + (2503 2504) + (2495 2496) + (2434 2435) + (2382 2383) + (2377 2380) + (2366 2368) + (2363 2363) + (2307 2307)) + (carriage-return (13 13)) + (extend + (917760 917999) + (917536 917631) + (127995 127999) + (125252 125258) + (125136 125142) + (124140 124143) + (123628 123631) + (123566 123566) + (123184 123190) + (123023 123023) + (122918 122922) + (122915 122916) + (122907 122913) + (122888 122904) + (122880 122886) + (121505 121519) + (121499 121503) + (121476 121476) + (121461 121461) + (121403 121452) + (121344 121398) + (119362 119364) + (119210 119213) + (119173 119179) + (119163 119170) + (119150 119154) + (119143 119145) + (119141 119141) + (118576 118598) + (118528 118573) + (113821 113822) + (94180 94180) + (94095 94098) + (94031 94031) + (92976 92982) + (92912 92916) + (78919 78933) + (78912 78912) + (73538 73538) + (73536 73536) + (73526 73530) + (73472 73473) + (73459 73460) + (73111 73111) + (73109 73109) + (73104 73105) + (73031 73031) + (73023 73029) + (73020 73021) + (73018 73018) + (73009 73014) + (72885 72886) + (72882 72883) + (72874 72880) + (72850 72871) + (72767 72767) + (72760 72765) + (72752 72758) + (72344 72345) + (72330 72342) + (72281 72283) + (72273 72278) + (72263 72263) + (72251 72254) + (72243 72248) + (72193 72202) + (72160 72160) + (72154 72155) + (72148 72151) + (72003 72003) + (71998 71998) + (71995 71996) + (71984 71984) + (71737 71738) + (71727 71735) + (71463 71467) + (71458 71461) + (71453 71455) + (71351 71351) + (71344 71349) + (71341 71341) + (71339 71339) + (71231 71232) + (71229 71229) + (71219 71226) + (71132 71133) + (71103 71104) + (71100 71101) + (71090 71093) + (71087 71087) + (70850 70851) + (70847 70848) + (70845 70845) + (70842 70842) + (70835 70840) + (70832 70832) + (70750 70750) + (70726 70726) + (70722 70724) + (70712 70719) + (70512 70516) + (70502 70508) + (70487 70487) + (70464 70464) + (70462 70462) + (70459 70460) + (70400 70401) + (70371 70378) + (70367 70367) + (70209 70209) + (70206 70206) + (70198 70199) + (70196 70196) + (70191 70193) + (70095 70095) + (70089 70092) + (70070 70078) + (70016 70017) + (70003 70003) + (69933 69940) + (69927 69931) + (69888 69890) + (69826 69826) + (69817 69818) + (69811 69814) + (69759 69761) + (69747 69748) + (69744 69744) + (69688 69702) + (69633 69633) + (69506 69509) + (69446 69456) + (69373 69375) + (69291 69292) + (68900 68903) + (68325 68326) + (68159 68159) + (68152 68154) + (68108 68111) + (68101 68102) + (68097 68099) + (66422 66426) + (66272 66272) + (66045 66045) + (65438 65439) + (65056 65071) + (65024 65039) + (64286 64286) + (44013 44013) + (44008 44008) + (44005 44005) + (43766 43766) + (43756 43757) + (43713 43713) + (43710 43711) + (43703 43704) + (43698 43700) + (43696 43696) + (43644 43644) + (43596 43596) + (43587 43587) + (43573 43574) + (43569 43570) + (43561 43566) + (43493 43493) + (43452 43453) + (43446 43449) + (43443 43443) + (43392 43394) + (43335 43345) + (43302 43309) + (43263 43263) + (43232 43249) + (43204 43205) + (43052 43052) + (43045 43046) + (43019 43019) + (43014 43014) + (43010 43010) + (42736 42737) + (42654 42655) + (42612 42621) + (42608 42610) + (42607 42607) + (12441 12442) + (12334 12335) + (12330 12333) + (11744 11775) + (11647 11647) + (11503 11505) + (8421 8432) + (8418 8420) + (8417 8417) + (8413 8416) + (8400 8412) + (8204 8204) + (7616 7679) + (7416 7417) + (7412 7412) + (7405 7405) + (7394 7400) + (7380 7392) + (7376 7378) + (7222 7223) + (7212 7219) + (7151 7153) + (7149 7149) + (7144 7145) + (7142 7142) + (7083 7085) + (7080 7081) + (7074 7077) + (7040 7041) + (7019 7027) + (6978 6978) + (6972 6972) + (6966 6970) + (6965 6965) + (6964 6964) + (6912 6915) + (6847 6862) + (6846 6846) + (6832 6845) + (6783 6783) + (6771 6780) + (6757 6764) + (6754 6754) + (6752 6752) + (6744 6750) + (6742 6742) + (6683 6683) + (6679 6680) + (6457 6459) + (6450 6450) + (6439 6440) + (6432 6434) + (6313 6313) + (6277 6278) + (6159 6159) + (6155 6157) + (6109 6109) + (6089 6099) + (6086 6086) + (6071 6077) + (6068 6069) + (6002 6003) + (5970 5971) + (5938 5939) + (5906 5908) + (4957 4959) + (4253 4253) + (4237 4237) + (4229 4230) + (4226 4226) + (4209 4212) + (4190 4192) + (4184 4185) + (4157 4158) + (4153 4154) + (4146 4151) + (4141 4144) + (4038 4038) + (3993 4028) + (3981 3991) + (3974 3975) + (3968 3972) + (3953 3966) + (3897 3897) + (3895 3895) + (3893 3893) + (3864 3865) + (3784 3790) + (3764 3772) + (3761 3761) + (3655 3662) + (3636 3642) + (3633 3633) + (3551 3551) + (3542 3542) + (3538 3540) + (3535 3535) + (3530 3530) + (3457 3457) + (3426 3427) + (3415 3415) + (3405 3405) + (3393 3396) + (3390 3390) + (3387 3388) + (3328 3329) + (3298 3299) + (3285 3286) + (3276 3277) + (3270 3270) + (3266 3266) + (3263 3263) + (3260 3260) + (3201 3201) + (3170 3171) + (3157 3158) + (3146 3149) + (3142 3144) + (3134 3136) + (3132 3132) + (3076 3076) + (3072 3072) + (3031 3031) + (3021 3021) + (3008 3008) + (3006 3006) + (2946 2946) + (2914 2915) + (2903 2903) + (2901 2902) + (2893 2893) + (2881 2884) + (2879 2879) + (2878 2878) + (2876 2876) + (2817 2817) + (2810 2815) + (2786 2787) + (2765 2765) + (2759 2760) + (2753 2757) + (2748 2748) + (2689 2690) + (2677 2677) + (2672 2673) + (2641 2641) + (2635 2637) + (2631 2632) + (2625 2626) + (2620 2620) + (2561 2562) + (2558 2558) + (2530 2531) + (2519 2519) + (2509 2509) + (2497 2500) + (2494 2494) + (2492 2492) + (2433 2433) + (2402 2403) + (2385 2391) + (2381 2381) + (2369 2376) + (2364 2364) + (2362 2362) + (2275 2306) + (2250 2273) + (2200 2207) + (2137 2139) + (2089 2093) + (2085 2087) + (2075 2083) + (2070 2073) + (2045 2045) + (2027 2035) + (1958 1968) + (1840 1866) + (1809 1809) + (1770 1773) + (1767 1768) + (1759 1764) + (1750 1756) + (1648 1648) + (1611 1631) + (1552 1562) + (1479 1479) + (1476 1477) + (1473 1474) + (1471 1471) + (1425 1469) + (1160 1161) + (1155 1159) + (768 879)) + (line-feed (10 10)) + (zerowidth-joiner (8205 8205)) + (hangul-syllable-t (55243 55291) (4520 4607)) + (control + (918000 921599) + (917632 917759) + (917506 917535) + (917505 917505) + (917504 917504) + (119155 119162) + (113824 113827) + (78896 78911) + (65529 65531) + (65520 65528) + (65279 65279) + (8294 8303) + (8293 8293) + (8288 8292) + (8234 8238) + (8233 8233) + (8232 8232) + (8206 8207) + (8203 8203) + (6158 6158) + (1564 1564) + (173 173) + (127 159) + (14 31) + (11 12) + (0 9))))) + +(ranges->charset! + grapheme-ht + 'hangul-syllable-l + char-set:grapheme-hangul-syllable-l) +(ranges->charset! + grapheme-ht + 'hangul-syllable-v + char-set:grapheme-hangul-syllable-v) +(ranges->charset! + grapheme-ht + 'hangul-syllable-lv + char-set:grapheme-hangul-syllable-lv) +(ranges->charset! + grapheme-ht + 'hangul-syllable-lvt + char-set:grapheme-hangul-syllable-lvt) +(ranges->charset! + grapheme-ht + 'prepend + char-set:grapheme-prepend) +(ranges->charset! + grapheme-ht + 'carriage-return + char-set:grapheme-carriage-return) +(ranges->charset! + grapheme-ht + 'line-feed + char-set:grapheme-line-feed) +(ranges->charset! + grapheme-ht + 'control + char-set:grapheme-control) +(ranges->charset! + grapheme-ht + 'extend + char-set:grapheme-extend) +(ranges->charset! + grapheme-ht + 'regional-indicator + char-set:grapheme-regional-indicator) +(ranges->charset! + grapheme-ht + 'spacing-mark + char-set:grapheme-spacing-mark) +(ranges->charset! + grapheme-ht + 'zerowidth-joiner + char-set:grapheme-zerowidth-joiner) diff --git a/runewidth/internal.scm b/runewidth/internal.scm index 863d455..fffbd6f 100644 --- a/runewidth/internal.scm +++ b/runewidth/internal.scm @@ -6,17 +6,20 @@ #:use-module (web uri) #:use-module (web client) #:use-module (web request) + #:use-module (srfi srfi-1) #:use-module (srfi srfi-71) #:export (@hex @codepoint @codepoint-range @comment @ws + cons-hash-list! hex-string->integer format-exception-msg in-surrogate-range wget-to-lines - file-to-lines)) + file-to-lines + ranges->charset!)) (define-peg-pattern @hex body (peg "[a-fA-F0-9]")) @@ -35,6 +38,26 @@ (define-peg-pattern @ws none (or " " "\t")) +(define-syntax-rule (cons-hash-list! ht key low high) + (let* ((old (hashq-ref ht key)) + (value (list low high)) + (new-lst + (if old + (cons value old) + (list value)))) + (hashq-set! ht key new-lst))) + +(define-syntax-rule (ranges->charset! ht name symbol) + (let* ((pairs (hashq-ref ht name))) + (for-each + (λ (pair) + (ucs-range->char-set! + (first pair) + ;; Exclusive upper range, so add one + (+ (second pair) 1) + #t symbol)) + pairs))) + (define (hex-string->integer str) ;; XXX: We would ideally do integer->char here and save it to file as such ;; However read-expr* does not actually work for all the characters! diff --git a/scripts/generate-east-asian.in b/scripts/generate-east-asian.in index aba5aff..324edd1 100644 --- a/scripts/generate-east-asian.in +++ b/scripts/generate-east-asian.in @@ -28,17 +28,18 @@ (define-peg-pattern @ea-line body (and @ea-datum (* @ws) @comment)) -(define ea-chars-ht (make-hash-table 6)) +(define eastasian-ht (make-hash-table 6)) (define (process-east-asian-line line) - (define (cons-ht! key low high) - (let* ((old (hashq-ref ea-chars-ht key)) - (value (list low high)) - (new-lst - (if old - (cons value old) - (list value)))) - (hashq-set! ea-chars-ht key new-lst))) + (define (string->property str comment) + (if (string-contains comment "COMBINING") + 'combining + (match str + ((or "W" "F") 'doublewidth) + ("H" 'halfwidth) + ("Na" 'narrow) + ("N" 'neutral) + ("A" 'ambiguous)))) (define tree (peg:tree (match-pattern @ea-line line))) @@ -49,7 +50,7 @@ (match tree (((('@codepoint-range ('@codepoint codepoints) ...) - ('@ea-width-prop width-prop)) + ('@ea-width-prop prop-str)) ('@comment comment)) (with-exception-handler @@ -58,25 +59,14 @@ (format-exception-msg stdout err)) (λ () (let ((f (hex-string->integer (first codepoints))) - (l (hex-string->integer (last codepoints)))) + (l (hex-string->integer (last codepoints))) + (width-prop (string->property prop-str comment))) (when (or (in-surrogate-range f) (in-surrogate-range l)) (error (format #f "chars in surrogate range ~x -> ~x" f l))) - (if (string-contains comment "COMBINING") - (cons-ht! 'combining f l) - (match width-prop - ((or "W" "F") - (cons-ht! 'doublewidth f l)) - ("H" - (cons-ht! 'halfwidth f l)) - ("Na" - (cons-ht! 'narrow f l)) - ("N" - (cons-ht! 'neutral f l)) - ("A" - (cons-ht! 'ambiguous f l)))))) + (cons-hash-list! eastasian-ht width-prop f l))) #:unwind? #t))))) (define ea-sets @@ -113,7 +103,7 @@ (with-output-to-file file (λ () - (display ";; Code generated by script/generate. DO NOT EDIT\n\n") + (format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line))) (for-each process-east-asian-line (line-func)) @@ -121,26 +111,13 @@ `(define-module (runewidth eastasian) #:use-module (ice-9 hash-table) #:use-module (srfi srfi-1) + #:use-module (runewidth internal) #:export ,ea-symbol-names)) (pretty-print - `(define chars-ht - (alist->hashq-table ',(hash-map->list cons ea-chars-ht)))) - - (display "\n") - - (pretty-print - `(define-syntax-rule (ranges->charset! name symbol) - (let* ((pairs (hashq-ref chars-ht name))) - (for-each - (λ (pair) - (ucs-range->char-set! - (first pair) - ;; Exclusive upper range, so add one - (+ (second pair) 1) - #t symbol)) - pairs)))) + `(define eastasian-ht + (alist->hashq-table ',(hash-map->list cons eastasian-ht)))) (display "\n") @@ -156,10 +133,11 @@ (λ (set-pair) (let ((name (first set-pair)) (symbol (second set-pair))) - (pretty-print - `(ranges->charset! ',name ,symbol)))) + (pretty-print + `(ranges->charset! eastasian-ht ',name ,symbol)))) ea-sets-and-symbols) (display "Code generation complete.\n" stdout))) (format stdout "Written to ~a.\n" file) + diff --git a/scripts/generate-emoji.in b/scripts/generate-emoji.in index 6f2acda..059bf63 100644 --- a/scripts/generate-emoji.in +++ b/scripts/generate-emoji.in @@ -32,10 +32,6 @@ (define (process-emoji-line line) (define tree (peg:tree (match-pattern @emoji-line line))) - (define (in-surrogate-range num) - (and (>= num #xd800) - (<= num #xdfff))) - (unless (or (not tree) (null? tree) (eq? '@comment (car tree))) @@ -76,7 +72,7 @@ (with-output-to-file file (λ () - (display ";; Code generated by script/generate. DO NOT EDIT\n\n") + (format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line))) (for-each process-emoji-line (line-func)) diff --git a/scripts/generate-graphemes.in b/scripts/generate-graphemes.in new file mode 100644 index 0000000..d42f72e --- /dev/null +++ b/scripts/generate-graphemes.in @@ -0,0 +1,145 @@ +#!@GUILE@ --no-auto-compile +-*- scheme -*- +!# + +;; Can be called with a trailing argument pointing to the file on disk. + +(use-modules + (runewidth internal) + (ice-9 pretty-print) + (ice-9 peg) + (ice-9 format) + (ice-9 exceptions) + (ice-9 match) + (ice-9 hash-table) + (srfi srfi-1)) + +(define stdout (current-output-port)) + +(define grapheme-url + "https://www.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt") + +(define-peg-pattern @grapheme-category all + (* (peg "[a-zA-Z_]"))) + +(define-peg-pattern @grapheme-datum body + (and @codepoint-range (* @ws) (ignore ";") (* @ws) @grapheme-category)) + +(define-peg-pattern @grapheme-line body + (and @grapheme-datum (* @ws) @comment)) + +(define grapheme-ht (make-hash-table 13)) + +(define grapheme-sets + '(hangul-syllable-l + hangul-syllable-v + hangul-syllable-lv + hangul-syllable-lvt + prepend + carriage-return + line-feed + control + extend + regional-indicator + spacing-mark + zerowidth-joiner)) + +(define grapheme-symbol-names + (map + (λ (set) + (string->symbol + (string-concatenate + (list "char-set:grapheme-" + (symbol->string set))))) + grapheme-sets)) + +(define grapheme-sets-and-symbols + (zip grapheme-sets grapheme-symbol-names)) + +(define (process-grapheme-line line) + (define (string->category str) + (match str + ("L" 'hangul-syllable-l) + ("V" 'hangul-syllable-v) + ("T" 'hangul-syllable-t) + ("LV" 'hangul-syllable-lv) + ("LVT" 'hangul-syllable-lvt) + ("Prepend" 'prepend) + ("CR" 'carriage-return) + ("LF" 'line-feed) + ("Control" 'control) + ("Extend" 'extend) + ("Regional_Indicator" 'regional-indicator) + ("SpacingMark" 'spacing-mark) + ("ZWJ" 'zerowidth-joiner))) + + (define tree (peg:tree (match-pattern @grapheme-line line))) + + (unless (or (not tree) + (null? tree) + (eq? '@comment (car tree))) + + (match tree + (((('@codepoint-range + ('@codepoint codepoints) ...) + ('@grapheme-category cat-str)) + ('@comment comment)) + + (with-exception-handler + (λ (err) + (format stdout "Skipping line due to error :: ") + (format-exception-msg stdout err)) + (λ () + (let ((f (hex-string->integer (first codepoints))) + (l (hex-string->integer (last codepoints))) + (category (string->category cat-str))) + + (when (or (in-surrogate-range f) + (in-surrogate-range l)) + (error (format #f "chars in surrogate range ~x -> ~x" f l))) + + (cons-hash-list! grapheme-ht category f l))) + #:unwind? #t))))) + +(define line-func + (if (= 2 (length (command-line))) + (λ () + (file-to-lines (last (command-line)) stdout)) + (λ () + (wget-to-lines grapheme-url stdout)))) + +(define file "runewidth/graphemes.scm") + +(format stdout "Writing to ~a...\n" file) + +(with-output-to-file file + (λ () + (format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (first (command-line))) + + (for-each process-grapheme-line (line-func)) + + (pretty-print + `(define-module (runewidth graphemes) + #:use-module (ice-9 hash-table) + #:use-module (srfi srfi-1) + #:use-module (runewidth internal) + #:export ,grapheme-symbol-names)) + + + (pretty-print + `(define grapheme-ht + (alist->hashq-table ',(hash-map->list cons grapheme-ht)))) + + (display "\n") + + (for-each + (λ (set-pair) + (let ((name (first set-pair)) + (symbol (second set-pair))) + (pretty-print + `(ranges->charset! grapheme-ht ',name ,symbol)))) + grapheme-sets-and-symbols) + + (display "Code generation complete.\n" stdout))) + +(format stdout "Written to ~a.\n" file)