From 1c0c4634f9e78b43468dc4809effa9c2210c14a2 Mon Sep 17 00:00:00 2001 From: Vivianne Langdon Date: Mon, 4 Mar 2024 17:21:19 -0500 Subject: [PATCH] Renaming vars [need to sort the output of hash table...] --- scripts/generate-eastasian.in | 4 +- scripts/generate-emoji.in | 4 +- scripts/generate-graphemes.in | 4 +- uniseg/eastasian.scm | 230 ++++----- uniseg/emoji.scm | 868 +++++++++++++++++----------------- uniseg/graphemes.scm | 842 ++++++++++++++++----------------- uniseg/internal.scm | 4 +- 7 files changed, 978 insertions(+), 978 deletions(-) diff --git a/scripts/generate-eastasian.in b/scripts/generate-eastasian.in index b2cc041..13027ef 100644 --- a/scripts/generate-eastasian.in +++ b/scripts/generate-eastasian.in @@ -60,7 +60,7 @@ #:export (,@eastasian-symbols eastasian-charsets))) - (define-values (process-line output-boilerplate) + (define-values (process-line print-to-file) (make-line-processor eastasian-ht string->property @@ -70,7 +70,7 @@ stdout)) (for-each process-line (cmdline-wget-or-file url stdout)) - (output-boilerplate) + (print-to-file) (display "Code generation complete.\n" stdout))) diff --git a/scripts/generate-emoji.in b/scripts/generate-emoji.in index 64ccd29..31d90f9 100644 --- a/scripts/generate-emoji.in +++ b/scripts/generate-emoji.in @@ -56,7 +56,7 @@ #:export (,@emoji-symbols emoji-charsets))) - (define-values (process-line output-boilerplate) + (define-values (process-line print-to-file) (make-line-processor emoji-ht string->property @@ -66,7 +66,7 @@ stdout)) (for-each process-line (cmdline-wget-or-file url stdout)) - (output-boilerplate) + (print-to-file) (display "Code generation complete.\n" stdout))) diff --git a/scripts/generate-graphemes.in b/scripts/generate-graphemes.in index 35520a7..fd256ff 100644 --- a/scripts/generate-graphemes.in +++ b/scripts/generate-graphemes.in @@ -73,7 +73,7 @@ #:export (,@grapheme-symbols grapheme-charsets))) - (define-values (process-line output-boilerplate) + (define-values (process-line print-to-file) (make-line-processor grapheme-ht string->property @@ -83,7 +83,7 @@ stdout)) (for-each process-line (cmdline-wget-or-file url stdout)) - (output-boilerplate) + (print-to-file) ;; Need emoji in the set as well. (pretty-print diff --git a/uniseg/eastasian.scm b/uniseg/eastasian.scm index f14d008..26d7241 100644 --- a/uniseg/eastasian.scm +++ b/uniseg/eastasian.scm @@ -18,7 +18,121 @@ eastasian-charsets)) (define hashtable (alist->hashq-table - '((ambiguous + '((combining + (125136 125142) + (122918 122922) + (122915 122916) + (122907 122913) + (122888 122904) + (122880 122886) + (119362 119364) + (119210 119213) + (119173 119179) + (119163 119170) + (119149 119154) + (119143 119145) + (119141 119142) + (92912 92916) + (70512 70516) + (70502 70508) + (70459 70460) + (70400 70401) + (69446 69456) + (69291 69292) + (66422 66426) + (66045 66045) + (65056 65071) + (43232 43249) + (42736 42737) + (42654 42655) + (42612 42621) + (42608 42610) + (42607 42607) + (12441 12442) + (11744 11775) + (11503 11505) + (8421 8432) + (8418 8420) + (8417 8417) + (8413 8416) + (8400 8412) + (7675 7679) + (7616 7673) + (7019 7027) + (6847 6848) + (6846 6846) + (6832 6845) + (6783 6783) + (4957 4959) + (3328 3329) + (3076 3076) + (3072 3072) + (2027 2035) + (1160 1161) + (1155 1159) + (768 879)) + (halfwidth + (65517 65518) + (65513 65516) + (65512 65512) + (65498 65500) + (65490 65495) + (65482 65487) + (65474 65479) + (65440 65470) + (65438 65439) + (65393 65437) + (65392 65392) + (65382 65391) + (65380 65381) + (65379 65379) + (65378 65378) + (65377 65377) + (8361 8361)) + (narrow + (10630 10630) + (10629 10629) + (10221 10221) + (10220 10220) + (10219 10219) + (10218 10218) + (10217 10217) + (10216 10216) + (10215 10215) + (10214 10214) + (175 175) + (172 172) + (166 166) + (165 165) + (162 163) + (126 126) + (125 125) + (124 124) + (123 123) + (97 122) + (96 96) + (95 95) + (94 94) + (93 93) + (92 92) + (91 91) + (65 90) + (63 64) + (60 62) + (58 59) + (48 57) + (46 47) + (45 45) + (44 44) + (43 43) + (42 42) + (41 41) + (40 40) + (37 39) + (36 36) + (33 35) + (32 32)) + (ambiguous (1048576 1114109) (983040 1048573) (917760 917999) @@ -216,67 +330,6 @@ (167 167) (164 164) (161 161)) - (halfwidth - (65517 65518) - (65513 65516) - (65512 65512) - (65498 65500) - (65490 65495) - (65482 65487) - (65474 65479) - (65440 65470) - (65438 65439) - (65393 65437) - (65392 65392) - (65382 65391) - (65380 65381) - (65379 65379) - (65378 65378) - (65377 65377) - (8361 8361)) - (narrow - (10630 10630) - (10629 10629) - (10221 10221) - (10220 10220) - (10219 10219) - (10218 10218) - (10217 10217) - (10216 10216) - (10215 10215) - (10214 10214) - (175 175) - (172 172) - (166 166) - (165 165) - (162 163) - (126 126) - (125 125) - (124 124) - (123 123) - (97 122) - (96 96) - (95 95) - (94 94) - (93 93) - (92 92) - (91 91) - (65 90) - (63 64) - (60 62) - (58 59) - (48 57) - (46 47) - (45 45) - (44 44) - (43 43) - (42 42) - (41 41) - (40 40) - (37 39) - (36 36) - (33 35) - (32 32)) (neutral (917536 917631) (917505 917505) @@ -2169,59 +2222,6 @@ (128 159) (127 127) (0 31)) - (combining - (125136 125142) - (122918 122922) - (122915 122916) - (122907 122913) - (122888 122904) - (122880 122886) - (119362 119364) - (119210 119213) - (119173 119179) - (119163 119170) - (119149 119154) - (119143 119145) - (119141 119142) - (92912 92916) - (70512 70516) - (70502 70508) - (70459 70460) - (70400 70401) - (69446 69456) - (69291 69292) - (66422 66426) - (66045 66045) - (65056 65071) - (43232 43249) - (42736 42737) - (42654 42655) - (42612 42621) - (42608 42610) - (42607 42607) - (12441 12442) - (11744 11775) - (11503 11505) - (8421 8432) - (8418 8420) - (8417 8417) - (8413 8416) - (8400 8412) - (7675 7679) - (7616 7673) - (7019 7027) - (6847 6848) - (6846 6846) - (6832 6845) - (6783 6783) - (4957 4959) - (3328 3329) - (3076 3076) - (3072 3072) - (2027 2035) - (1160 1161) - (1155 1159) - (768 879)) (doublewidth (201547 262141) (196608 201546) diff --git a/uniseg/emoji.scm b/uniseg/emoji.scm index 6d33c62..f3c0396 100644 --- a/uniseg/emoji.scm +++ b/uniseg/emoji.scm @@ -18,7 +18,439 @@ emoji-charsets)) (define hashtable (alist->hashq-table - '((emoji-extended-pictographic + '((emoji-modifier-base + (129489 129501) + (129485 129487) + (129467 129467) + (129464 129465) + (129461 129462) + (129399 129399) + (129340 129342) + (129331 129337) + (129329 129330) + (129328 129328) + (129318 129318) + (129311 129311) + (129305 129310) + (129304 129304) + (129295 129295) + (129292 129292) + (128716 128716) + (128704 128704) + (128694 128694) + (128692 128693) + (128675 128675) + (128587 128591) + (128581 128583) + (128405 128406) + (128400 128400) + (128378 128378) + (128372 128373) + (128170 128170) + (128145 128145) + (128143 128143) + (128133 128135) + (128129 128131) + (128124 128124) + (128110 128120) + (128108 128109) + (128102 128107) + (128070 128080) + (128066 128067) + (127947 127948) + (127946 127946) + (127943 127943) + (127938 127940) + (127877 127877) + (9997 9997) + (9994 9996) + (9977 9977) + (9757 9757)) + (emoji (129744 129750) + (129728 129730) + (129712 129718) + (129686 129704) + (129680 129685) + (129667 129670) + (129664 129666) + (129656 129658) + (129652 129652) + (129648 129651) + (129511 129535) + (129488 129510) + (129485 129487) + (129483 129483) + (129475 129482) + (129473 129474) + (129472 129472) + (129466 129471) + (129456 129465) + (129454 129455) + (129451 129453) + (129445 129450) + (129443 129444) + (129432 129442) + (129426 129431) + (129413 129425) + (129408 129412) + (129404 129407) + (129403 129403) + (129402 129402) + (129399 129400) + (129395 129398) + (129394 129394) + (129393 129393) + (129388 129392) + (129375 129387) + (129360 129374) + (129357 129359) + (129356 129356) + (129351 129355) + (129344 129349) + (129343 129343) + (129340 129342) + (129331 129338) + (129329 129330) + (129328 129328) + (129320 129327) + (129312 129319) + (129311 129311) + (129305 129310) + (129296 129304) + (129293 129295) + (129292 129292) + (128992 129003) + (128763 128764) + (128762 128762) + (128761 128761) + (128759 128760) + (128756 128758) + (128755 128755) + (128752 128752) + (128747 128748) + (128745 128745) + (128736 128741) + (128726 128727) + (128725 128725) + (128721 128722) + (128720 128720) + (128717 128719) + (128716 128716) + (128715 128715) + (128705 128709) + (128704 128704) + (128703 128703) + (128697 128702) + (128695 128696) + (128694 128694) + (128691 128693) + (128690 128690) + (128686 128689) + (128679 128685) + (128678 128678) + (128676 128677) + (128675 128675) + (128674 128674) + (128667 128673) + (128665 128666) + (128664 128664) + (128663 128663) + (128662 128662) + (128661 128661) + (128660 128660) + (128657 128659) + (128656 128656) + (128655 128655) + (128654 128654) + (128653 128653) + (128652 128652) + (128650 128651) + (128649 128649) + (128648 128648) + (128647 128647) + (128646 128646) + (128643 128645) + (128641 128642) + (128640 128640) + (128581 128591) + (128577 128580) + (128567 128576) + (128566 128566) + (128565 128565) + (128564 128564) + (128560 128563) + (128558 128559) + (128557 128557) + (128556 128556) + (128552 128555) + (128550 128551) + (128544 128549) + (128543 128543) + (128540 128542) + (128539 128539) + (128538 128538) + (128537 128537) + (128536 128536) + (128535 128535) + (128534 128534) + (128533 128533) + (128530 128532) + (128529 128529) + (128528 128528) + (128527 128527) + (128526 128526) + (128521 128525) + (128519 128520) + (128513 128518) + (128512 128512) + (128507 128511) + (128506 128506) + (128499 128499) + (128495 128495) + (128488 128488) + (128483 128483) + (128481 128481) + (128476 128478) + (128465 128467) + (128450 128452) + (128444 128444) + (128433 128434) + (128424 128424) + (128421 128421) + (128420 128420) + (128405 128406) + (128400 128400) + (128394 128397) + (128391 128391) + (128378 128378) + (128371 128377) + (128367 128368) + (128348 128359) + (128336 128347) + (128331 128334) + (128329 128330) + (128302 128317) + (128300 128301) + (128278 128299) + (128277 128277) + (128266 128276) + (128265 128265) + (128264 128264) + (128260 128263) + (128259 128259) + (128255 128258) + (128253 128253) + (128249 128252) + (128248 128248) + (128246 128247) + (128245 128245) + (128240 128244) + (128239 128239) + (128238 128238) + (128236 128237) + (128184 128235) + (128182 128183) + (128174 128181) + (128173 128173) + (128110 128172) + (128108 128109) + (128102 128107) + (128101 128101) + (128066 128100) + (128065 128065) + (128064 128064) + (128063 128063) + (128043 128062) + (128042 128042) + (128023 128041) + (128022 128022) + (128021 128021) + (128020 128020) + (128019 128019) + (128017 128018) + (128015 128016) + (128012 128014) + (128009 128011) + (128008 128008) + (127992 128007) + (127991 127991) + (127989 127989) + (127988 127988) + (127987 127987) + (127973 127984) + (127972 127972) + (127968 127971) + (127956 127967) + (127951 127955) + (127947 127950) + (127946 127946) + (127945 127945) + (127944 127944) + (127943 127943) + (127942 127942) + (127941 127941) + (127904 127940) + (127902 127903) + (127897 127899) + (127894 127895) + (127872 127891) + (127870 127871) + (127869 127869) + (127868 127868) + (127825 127867) + (127824 127824) + (127820 127823) + (127819 127819) + (127799 127818) + (127798 127798) + (127796 127797) + (127794 127795) + (127792 127793) + (127789 127791) + (127780 127788) + (127777 127777) + (127775 127776) + (127773 127774) + (127772 127772) + (127771 127771) + (127770 127770) + (127769 127769) + (127766 127768) + (127763 127765) + (127762 127762) + (127761 127761) + (127760 127760) + (127759 127759) + (127757 127758) + (127744 127756) + (127568 127569) + (127538 127546) + (127535 127535) + (127514 127514) + (127489 127490) + (127462 127487) + (127377 127386) + (127374 127374) + (127358 127359) + (127344 127345) + (127183 127183) + (126980 126980) + (12953 12953) + (12951 12951) + (12349 12349) + (12336 12336) + (11093 11093) + (11088 11088) + (11035 11036) + (11013 11015) + (10548 10549) + (10175 10175) + (10160 10160) + (10145 10145) + (10133 10135) + (10084 10084) + (10083 10083) + (10071 10071) + (10067 10069) + (10062 10062) + (10060 10060) + (10055 10055) + (10052 10052) + (10035 10036) + (10024 10024) + (10017 10017) + (10013 10013) + (10006 10006) + (10004 10004) + (10002 10002) + (9999 9999) + (9997 9997) + (9992 9996) + (9989 9989) + (9986 9986) + (9981 9981) + (9978 9978) + (9975 9977) + (9973 9973) + (9972 9972) + (9970 9971) + (9968 9969) + (9962 9962) + (9961 9961) + (9940 9940) + (9939 9939) + (9937 9937) + (9935 9935) + (9934 9934) + (9928 9928) + (9924 9925) + (9917 9918) + (9904 9905) + (9898 9899) + (9895 9895) + (9888 9889) + (9883 9884) + (9881 9881) + (9878 9879) + (9877 9877) + (9876 9876) + (9875 9875) + (9874 9874) + (9855 9855) + (9854 9854) + (9851 9851) + (9832 9832) + (9829 9830) + (9827 9827) + (9824 9824) + (9823 9823) + (9800 9811) + (9794 9794) + (9792 9792) + (9786 9786) + (9784 9785) + (9775 9775) + (9774 9774) + (9770 9770) + (9766 9766) + (9762 9763) + (9760 9760) + (9757 9757) + (9752 9752) + (9748 9749) + (9745 9745) + (9742 9742) + (9732 9732) + (9730 9731) + (9728 9729) + (9723 9726) + (9664 9664) + (9654 9654) + (9642 9643) + (9410 9410) + (9208 9210) + (9203 9203) + (9201 9202) + (9200 9200) + (9199 9199) + (9197 9198) + (9193 9196) + (9167 9167) + (9000 9000) + (8986 8987) + (8617 8618) + (8596 8601) + (8505 8505) + (8482 8482) + (8265 8265) + (8252 8252) + (174 174) + (169 169) + (48 57) + (42 42) + (35 35)) + (emoji-modifier (127995 127999)) + (emoji-extended-pictographic (130048 131069) (129751 129791) (129744 129750) @@ -782,439 +1214,7 @@ (8205 8205) (48 57) (42 42) - (35 35)) - (emoji-modifier-base - (129489 129501) - (129485 129487) - (129467 129467) - (129464 129465) - (129461 129462) - (129399 129399) - (129340 129342) - (129331 129337) - (129329 129330) - (129328 129328) - (129318 129318) - (129311 129311) - (129305 129310) - (129304 129304) - (129295 129295) - (129292 129292) - (128716 128716) - (128704 128704) - (128694 128694) - (128692 128693) - (128675 128675) - (128587 128591) - (128581 128583) - (128405 128406) - (128400 128400) - (128378 128378) - (128372 128373) - (128170 128170) - (128145 128145) - (128143 128143) - (128133 128135) - (128129 128131) - (128124 128124) - (128110 128120) - (128108 128109) - (128102 128107) - (128070 128080) - (128066 128067) - (127947 127948) - (127946 127946) - (127943 127943) - (127938 127940) - (127877 127877) - (9997 9997) - (9994 9996) - (9977 9977) - (9757 9757)) - (emoji (129744 129750) - (129728 129730) - (129712 129718) - (129686 129704) - (129680 129685) - (129667 129670) - (129664 129666) - (129656 129658) - (129652 129652) - (129648 129651) - (129511 129535) - (129488 129510) - (129485 129487) - (129483 129483) - (129475 129482) - (129473 129474) - (129472 129472) - (129466 129471) - (129456 129465) - (129454 129455) - (129451 129453) - (129445 129450) - (129443 129444) - (129432 129442) - (129426 129431) - (129413 129425) - (129408 129412) - (129404 129407) - (129403 129403) - (129402 129402) - (129399 129400) - (129395 129398) - (129394 129394) - (129393 129393) - (129388 129392) - (129375 129387) - (129360 129374) - (129357 129359) - (129356 129356) - (129351 129355) - (129344 129349) - (129343 129343) - (129340 129342) - (129331 129338) - (129329 129330) - (129328 129328) - (129320 129327) - (129312 129319) - (129311 129311) - (129305 129310) - (129296 129304) - (129293 129295) - (129292 129292) - (128992 129003) - (128763 128764) - (128762 128762) - (128761 128761) - (128759 128760) - (128756 128758) - (128755 128755) - (128752 128752) - (128747 128748) - (128745 128745) - (128736 128741) - (128726 128727) - (128725 128725) - (128721 128722) - (128720 128720) - (128717 128719) - (128716 128716) - (128715 128715) - (128705 128709) - (128704 128704) - (128703 128703) - (128697 128702) - (128695 128696) - (128694 128694) - (128691 128693) - (128690 128690) - (128686 128689) - (128679 128685) - (128678 128678) - (128676 128677) - (128675 128675) - (128674 128674) - (128667 128673) - (128665 128666) - (128664 128664) - (128663 128663) - (128662 128662) - (128661 128661) - (128660 128660) - (128657 128659) - (128656 128656) - (128655 128655) - (128654 128654) - (128653 128653) - (128652 128652) - (128650 128651) - (128649 128649) - (128648 128648) - (128647 128647) - (128646 128646) - (128643 128645) - (128641 128642) - (128640 128640) - (128581 128591) - (128577 128580) - (128567 128576) - (128566 128566) - (128565 128565) - (128564 128564) - (128560 128563) - (128558 128559) - (128557 128557) - (128556 128556) - (128552 128555) - (128550 128551) - (128544 128549) - (128543 128543) - (128540 128542) - (128539 128539) - (128538 128538) - (128537 128537) - (128536 128536) - (128535 128535) - (128534 128534) - (128533 128533) - (128530 128532) - (128529 128529) - (128528 128528) - (128527 128527) - (128526 128526) - (128521 128525) - (128519 128520) - (128513 128518) - (128512 128512) - (128507 128511) - (128506 128506) - (128499 128499) - (128495 128495) - (128488 128488) - (128483 128483) - (128481 128481) - (128476 128478) - (128465 128467) - (128450 128452) - (128444 128444) - (128433 128434) - (128424 128424) - (128421 128421) - (128420 128420) - (128405 128406) - (128400 128400) - (128394 128397) - (128391 128391) - (128378 128378) - (128371 128377) - (128367 128368) - (128348 128359) - (128336 128347) - (128331 128334) - (128329 128330) - (128302 128317) - (128300 128301) - (128278 128299) - (128277 128277) - (128266 128276) - (128265 128265) - (128264 128264) - (128260 128263) - (128259 128259) - (128255 128258) - (128253 128253) - (128249 128252) - (128248 128248) - (128246 128247) - (128245 128245) - (128240 128244) - (128239 128239) - (128238 128238) - (128236 128237) - (128184 128235) - (128182 128183) - (128174 128181) - (128173 128173) - (128110 128172) - (128108 128109) - (128102 128107) - (128101 128101) - (128066 128100) - (128065 128065) - (128064 128064) - (128063 128063) - (128043 128062) - (128042 128042) - (128023 128041) - (128022 128022) - (128021 128021) - (128020 128020) - (128019 128019) - (128017 128018) - (128015 128016) - (128012 128014) - (128009 128011) - (128008 128008) - (127992 128007) - (127991 127991) - (127989 127989) - (127988 127988) - (127987 127987) - (127973 127984) - (127972 127972) - (127968 127971) - (127956 127967) - (127951 127955) - (127947 127950) - (127946 127946) - (127945 127945) - (127944 127944) - (127943 127943) - (127942 127942) - (127941 127941) - (127904 127940) - (127902 127903) - (127897 127899) - (127894 127895) - (127872 127891) - (127870 127871) - (127869 127869) - (127868 127868) - (127825 127867) - (127824 127824) - (127820 127823) - (127819 127819) - (127799 127818) - (127798 127798) - (127796 127797) - (127794 127795) - (127792 127793) - (127789 127791) - (127780 127788) - (127777 127777) - (127775 127776) - (127773 127774) - (127772 127772) - (127771 127771) - (127770 127770) - (127769 127769) - (127766 127768) - (127763 127765) - (127762 127762) - (127761 127761) - (127760 127760) - (127759 127759) - (127757 127758) - (127744 127756) - (127568 127569) - (127538 127546) - (127535 127535) - (127514 127514) - (127489 127490) - (127462 127487) - (127377 127386) - (127374 127374) - (127358 127359) - (127344 127345) - (127183 127183) - (126980 126980) - (12953 12953) - (12951 12951) - (12349 12349) - (12336 12336) - (11093 11093) - (11088 11088) - (11035 11036) - (11013 11015) - (10548 10549) - (10175 10175) - (10160 10160) - (10145 10145) - (10133 10135) - (10084 10084) - (10083 10083) - (10071 10071) - (10067 10069) - (10062 10062) - (10060 10060) - (10055 10055) - (10052 10052) - (10035 10036) - (10024 10024) - (10017 10017) - (10013 10013) - (10006 10006) - (10004 10004) - (10002 10002) - (9999 9999) - (9997 9997) - (9992 9996) - (9989 9989) - (9986 9986) - (9981 9981) - (9978 9978) - (9975 9977) - (9973 9973) - (9972 9972) - (9970 9971) - (9968 9969) - (9962 9962) - (9961 9961) - (9940 9940) - (9939 9939) - (9937 9937) - (9935 9935) - (9934 9934) - (9928 9928) - (9924 9925) - (9917 9918) - (9904 9905) - (9898 9899) - (9895 9895) - (9888 9889) - (9883 9884) - (9881 9881) - (9878 9879) - (9877 9877) - (9876 9876) - (9875 9875) - (9874 9874) - (9855 9855) - (9854 9854) - (9851 9851) - (9832 9832) - (9829 9830) - (9827 9827) - (9824 9824) - (9823 9823) - (9800 9811) - (9794 9794) - (9792 9792) - (9786 9786) - (9784 9785) - (9775 9775) - (9774 9774) - (9770 9770) - (9766 9766) - (9762 9763) - (9760 9760) - (9757 9757) - (9752 9752) - (9748 9749) - (9745 9745) - (9742 9742) - (9732 9732) - (9730 9731) - (9728 9729) - (9723 9726) - (9664 9664) - (9654 9654) - (9642 9643) - (9410 9410) - (9208 9210) - (9203 9203) - (9201 9202) - (9200 9200) - (9199 9199) - (9197 9198) - (9193 9196) - (9167 9167) - (9000 9000) - (8986 8987) - (8617 8618) - (8596 8601) - (8505 8505) - (8482 8482) - (8265 8265) - (8252 8252) - (174 174) - (169 169) - (48 57) - (42 42) - (35 35)) - (emoji-modifier (127995 127999))))) + (35 35))))) (define char-set:emoji (char-set)) (define char-set:emoji-presentation (char-set)) diff --git a/uniseg/graphemes.scm b/uniseg/graphemes.scm index c38529f..4b79530 100644 --- a/uniseg/graphemes.scm +++ b/uniseg/graphemes.scm @@ -26,7 +26,23 @@ grapheme-charsets)) (define hashtable (alist->hashq-table - '((hangul-syllable-l (43360 43388) (4352 4447)) + '((carriage-return (13 13)) + (prepend + (73474 73474) + (73030 73030) + (72324 72329) + (72250 72250) + (72001 72001) + (71999 71999) + (70082 70083) + (69837 69837) + (69821 69821) + (3406 3406) + (2274 2274) + (2192 2193) + (1807 1807) + (1757 1757) + (1536 1541)) (control (918000 921599) (917632 917759) @@ -54,410 +70,7 @@ (14 31) (11 12) (0 9)) - (carriage-return (13 13)) - (hangul-syllable-v (55216 55238) (4448 4519)) (line-feed (10 10)) - (hangul-syllable-lvt - (55177 55203) - (55149 55175) - (55121 55147) - (55093 55119) - (55065 55091) - (55037 55063) - (55009 55035) - (54981 55007) - (54953 54979) - (54925 54951) - (54897 54923) - (54869 54895) - (54841 54867) - (54813 54839) - (54785 54811) - (54757 54783) - (54729 54755) - (54701 54727) - (54673 54699) - (54645 54671) - (54617 54643) - (54589 54615) - (54561 54587) - (54533 54559) - (54505 54531) - (54477 54503) - (54449 54475) - (54421 54447) - (54393 54419) - (54365 54391) - (54337 54363) - (54309 54335) - (54281 54307) - (54253 54279) - (54225 54251) - (54197 54223) - (54169 54195) - (54141 54167) - (54113 54139) - (54085 54111) - (54057 54083) - (54029 54055) - (54001 54027) - (53973 53999) - (53945 53971) - (53917 53943) - (53889 53915) - (53861 53887) - (53833 53859) - (53805 53831) - (53777 53803) - (53749 53775) - (53721 53747) - (53693 53719) - (53665 53691) - (53637 53663) - (53609 53635) - (53581 53607) - (53553 53579) - (53525 53551) - (53497 53523) - (53469 53495) - (53441 53467) - (53413 53439) - (53385 53411) - (53357 53383) - (53329 53355) - (53301 53327) - (53273 53299) - (53245 53271) - (53217 53243) - (53189 53215) - (53161 53187) - (53133 53159) - (53105 53131) - (53077 53103) - (53049 53075) - (53021 53047) - (52993 53019) - (52965 52991) - (52937 52963) - (52909 52935) - (52881 52907) - (52853 52879) - (52825 52851) - (52797 52823) - (52769 52795) - (52741 52767) - (52713 52739) - (52685 52711) - (52657 52683) - (52629 52655) - (52601 52627) - (52573 52599) - (52545 52571) - (52517 52543) - (52489 52515) - (52461 52487) - (52433 52459) - (52405 52431) - (52377 52403) - (52349 52375) - (52321 52347) - (52293 52319) - (52265 52291) - (52237 52263) - (52209 52235) - (52181 52207) - (52153 52179) - (52125 52151) - (52097 52123) - (52069 52095) - (52041 52067) - (52013 52039) - (51985 52011) - (51957 51983) - (51929 51955) - (51901 51927) - (51873 51899) - (51845 51871) - (51817 51843) - (51789 51815) - (51761 51787) - (51733 51759) - (51705 51731) - (51677 51703) - (51649 51675) - (51621 51647) - (51593 51619) - (51565 51591) - (51537 51563) - (51509 51535) - (51481 51507) - (51453 51479) - (51425 51451) - (51397 51423) - (51369 51395) - (51341 51367) - (51313 51339) - (51285 51311) - (51257 51283) - (51229 51255) - (51201 51227) - (51173 51199) - (51145 51171) - (51117 51143) - (51089 51115) - (51061 51087) - (51033 51059) - (51005 51031) - (50977 51003) - (50949 50975) - (50921 50947) - (50893 50919) - (50865 50891) - (50837 50863) - (50809 50835) - (50781 50807) - (50753 50779) - (50725 50751) - (50697 50723) - (50669 50695) - (50641 50667) - (50613 50639) - (50585 50611) - (50557 50583) - (50529 50555) - (50501 50527) - (50473 50499) - (50445 50471) - (50417 50443) - (50389 50415) - (50361 50387) - (50333 50359) - (50305 50331) - (50277 50303) - (50249 50275) - (50221 50247) - (50193 50219) - (50165 50191) - (50137 50163) - (50109 50135) - (50081 50107) - (50053 50079) - (50025 50051) - (49997 50023) - (49969 49995) - (49941 49967) - (49913 49939) - (49885 49911) - (49857 49883) - (49829 49855) - (49801 49827) - (49773 49799) - (49745 49771) - (49717 49743) - (49689 49715) - (49661 49687) - (49633 49659) - (49605 49631) - (49577 49603) - (49549 49575) - (49521 49547) - (49493 49519) - (49465 49491) - (49437 49463) - (49409 49435) - (49381 49407) - (49353 49379) - (49325 49351) - (49297 49323) - (49269 49295) - (49241 49267) - (49213 49239) - (49185 49211) - (49157 49183) - (49129 49155) - (49101 49127) - (49073 49099) - (49045 49071) - (49017 49043) - (48989 49015) - (48961 48987) - (48933 48959) - (48905 48931) - (48877 48903) - (48849 48875) - (48821 48847) - (48793 48819) - (48765 48791) - (48737 48763) - (48709 48735) - (48681 48707) - (48653 48679) - (48625 48651) - (48597 48623) - (48569 48595) - (48541 48567) - (48513 48539) - (48485 48511) - (48457 48483) - (48429 48455) - (48401 48427) - (48373 48399) - (48345 48371) - (48317 48343) - (48289 48315) - (48261 48287) - (48233 48259) - (48205 48231) - (48177 48203) - (48149 48175) - (48121 48147) - (48093 48119) - (48065 48091) - (48037 48063) - (48009 48035) - (47981 48007) - (47953 47979) - (47925 47951) - (47897 47923) - (47869 47895) - (47841 47867) - (47813 47839) - (47785 47811) - (47757 47783) - (47729 47755) - (47701 47727) - (47673 47699) - (47645 47671) - (47617 47643) - (47589 47615) - (47561 47587) - (47533 47559) - (47505 47531) - (47477 47503) - (47449 47475) - (47421 47447) - (47393 47419) - (47365 47391) - (47337 47363) - (47309 47335) - (47281 47307) - (47253 47279) - (47225 47251) - (47197 47223) - (47169 47195) - (47141 47167) - (47113 47139) - (47085 47111) - (47057 47083) - (47029 47055) - (47001 47027) - (46973 46999) - (46945 46971) - (46917 46943) - (46889 46915) - (46861 46887) - (46833 46859) - (46805 46831) - (46777 46803) - (46749 46775) - (46721 46747) - (46693 46719) - (46665 46691) - (46637 46663) - (46609 46635) - (46581 46607) - (46553 46579) - (46525 46551) - (46497 46523) - (46469 46495) - (46441 46467) - (46413 46439) - (46385 46411) - (46357 46383) - (46329 46355) - (46301 46327) - (46273 46299) - (46245 46271) - (46217 46243) - (46189 46215) - (46161 46187) - (46133 46159) - (46105 46131) - (46077 46103) - (46049 46075) - (46021 46047) - (45993 46019) - (45965 45991) - (45937 45963) - (45909 45935) - (45881 45907) - (45853 45879) - (45825 45851) - (45797 45823) - (45769 45795) - (45741 45767) - (45713 45739) - (45685 45711) - (45657 45683) - (45629 45655) - (45601 45627) - (45573 45599) - (45545 45571) - (45517 45543) - (45489 45515) - (45461 45487) - (45433 45459) - (45405 45431) - (45377 45403) - (45349 45375) - (45321 45347) - (45293 45319) - (45265 45291) - (45237 45263) - (45209 45235) - (45181 45207) - (45153 45179) - (45125 45151) - (45097 45123) - (45069 45095) - (45041 45067) - (45013 45039) - (44985 45011) - (44957 44983) - (44929 44955) - (44901 44927) - (44873 44899) - (44845 44871) - (44817 44843) - (44789 44815) - (44761 44787) - (44733 44759) - (44705 44731) - (44677 44703) - (44649 44675) - (44621 44647) - (44593 44619) - (44565 44591) - (44537 44563) - (44509 44535) - (44481 44507) - (44453 44479) - (44425 44451) - (44397 44423) - (44369 44395) - (44341 44367) - (44313 44339) - (44285 44311) - (44257 44283) - (44229 44255) - (44201 44227) - (44173 44199) - (44145 44171) - (44117 44143) - (44089 44115) - (44061 44087) - (44033 44059)) - (zero-width-joiner (8205 8205)) (regional-indicator (127462 127487)) (hangul-syllable-lv (55176 55176) @@ -859,6 +472,9 @@ (44088 44088) (44060 44060) (44032 44032)) + (hangul-syllable-t (55243 55291) (4520 4607)) + (hangul-syllable-v (55216 55238) (4448 4519)) + (hangul-syllable-l (43360 43388) (4352 4447)) (extend (917760 917999) (917536 917631) @@ -1237,7 +853,6 @@ (1160 1161) (1155 1159) (768 879)) - (hangul-syllable-t (55243 55291) (4520 4607)) (spacing-mark (119149 119149) (119142 119142) @@ -1404,22 +1019,407 @@ (2366 2368) (2363 2363) (2307 2307)) - (prepend - (73474 73474) - (73030 73030) - (72324 72329) - (72250 72250) - (72001 72001) - (71999 71999) - (70082 70083) - (69837 69837) - (69821 69821) - (3406 3406) - (2274 2274) - (2192 2193) - (1807 1807) - (1757 1757) - (1536 1541))))) + (hangul-syllable-lvt + (55177 55203) + (55149 55175) + (55121 55147) + (55093 55119) + (55065 55091) + (55037 55063) + (55009 55035) + (54981 55007) + (54953 54979) + (54925 54951) + (54897 54923) + (54869 54895) + (54841 54867) + (54813 54839) + (54785 54811) + (54757 54783) + (54729 54755) + (54701 54727) + (54673 54699) + (54645 54671) + (54617 54643) + (54589 54615) + (54561 54587) + (54533 54559) + (54505 54531) + (54477 54503) + (54449 54475) + (54421 54447) + (54393 54419) + (54365 54391) + (54337 54363) + (54309 54335) + (54281 54307) + (54253 54279) + (54225 54251) + (54197 54223) + (54169 54195) + (54141 54167) + (54113 54139) + (54085 54111) + (54057 54083) + (54029 54055) + (54001 54027) + (53973 53999) + (53945 53971) + (53917 53943) + (53889 53915) + (53861 53887) + (53833 53859) + (53805 53831) + (53777 53803) + (53749 53775) + (53721 53747) + (53693 53719) + (53665 53691) + (53637 53663) + (53609 53635) + (53581 53607) + (53553 53579) + (53525 53551) + (53497 53523) + (53469 53495) + (53441 53467) + (53413 53439) + (53385 53411) + (53357 53383) + (53329 53355) + (53301 53327) + (53273 53299) + (53245 53271) + (53217 53243) + (53189 53215) + (53161 53187) + (53133 53159) + (53105 53131) + (53077 53103) + (53049 53075) + (53021 53047) + (52993 53019) + (52965 52991) + (52937 52963) + (52909 52935) + (52881 52907) + (52853 52879) + (52825 52851) + (52797 52823) + (52769 52795) + (52741 52767) + (52713 52739) + (52685 52711) + (52657 52683) + (52629 52655) + (52601 52627) + (52573 52599) + (52545 52571) + (52517 52543) + (52489 52515) + (52461 52487) + (52433 52459) + (52405 52431) + (52377 52403) + (52349 52375) + (52321 52347) + (52293 52319) + (52265 52291) + (52237 52263) + (52209 52235) + (52181 52207) + (52153 52179) + (52125 52151) + (52097 52123) + (52069 52095) + (52041 52067) + (52013 52039) + (51985 52011) + (51957 51983) + (51929 51955) + (51901 51927) + (51873 51899) + (51845 51871) + (51817 51843) + (51789 51815) + (51761 51787) + (51733 51759) + (51705 51731) + (51677 51703) + (51649 51675) + (51621 51647) + (51593 51619) + (51565 51591) + (51537 51563) + (51509 51535) + (51481 51507) + (51453 51479) + (51425 51451) + (51397 51423) + (51369 51395) + (51341 51367) + (51313 51339) + (51285 51311) + (51257 51283) + (51229 51255) + (51201 51227) + (51173 51199) + (51145 51171) + (51117 51143) + (51089 51115) + (51061 51087) + (51033 51059) + (51005 51031) + (50977 51003) + (50949 50975) + (50921 50947) + (50893 50919) + (50865 50891) + (50837 50863) + (50809 50835) + (50781 50807) + (50753 50779) + (50725 50751) + (50697 50723) + (50669 50695) + (50641 50667) + (50613 50639) + (50585 50611) + (50557 50583) + (50529 50555) + (50501 50527) + (50473 50499) + (50445 50471) + (50417 50443) + (50389 50415) + (50361 50387) + (50333 50359) + (50305 50331) + (50277 50303) + (50249 50275) + (50221 50247) + (50193 50219) + (50165 50191) + (50137 50163) + (50109 50135) + (50081 50107) + (50053 50079) + (50025 50051) + (49997 50023) + (49969 49995) + (49941 49967) + (49913 49939) + (49885 49911) + (49857 49883) + (49829 49855) + (49801 49827) + (49773 49799) + (49745 49771) + (49717 49743) + (49689 49715) + (49661 49687) + (49633 49659) + (49605 49631) + (49577 49603) + (49549 49575) + (49521 49547) + (49493 49519) + (49465 49491) + (49437 49463) + (49409 49435) + (49381 49407) + (49353 49379) + (49325 49351) + (49297 49323) + (49269 49295) + (49241 49267) + (49213 49239) + (49185 49211) + (49157 49183) + (49129 49155) + (49101 49127) + (49073 49099) + (49045 49071) + (49017 49043) + (48989 49015) + (48961 48987) + (48933 48959) + (48905 48931) + (48877 48903) + (48849 48875) + (48821 48847) + (48793 48819) + (48765 48791) + (48737 48763) + (48709 48735) + (48681 48707) + (48653 48679) + (48625 48651) + (48597 48623) + (48569 48595) + (48541 48567) + (48513 48539) + (48485 48511) + (48457 48483) + (48429 48455) + (48401 48427) + (48373 48399) + (48345 48371) + (48317 48343) + (48289 48315) + (48261 48287) + (48233 48259) + (48205 48231) + (48177 48203) + (48149 48175) + (48121 48147) + (48093 48119) + (48065 48091) + (48037 48063) + (48009 48035) + (47981 48007) + (47953 47979) + (47925 47951) + (47897 47923) + (47869 47895) + (47841 47867) + (47813 47839) + (47785 47811) + (47757 47783) + (47729 47755) + (47701 47727) + (47673 47699) + (47645 47671) + (47617 47643) + (47589 47615) + (47561 47587) + (47533 47559) + (47505 47531) + (47477 47503) + (47449 47475) + (47421 47447) + (47393 47419) + (47365 47391) + (47337 47363) + (47309 47335) + (47281 47307) + (47253 47279) + (47225 47251) + (47197 47223) + (47169 47195) + (47141 47167) + (47113 47139) + (47085 47111) + (47057 47083) + (47029 47055) + (47001 47027) + (46973 46999) + (46945 46971) + (46917 46943) + (46889 46915) + (46861 46887) + (46833 46859) + (46805 46831) + (46777 46803) + (46749 46775) + (46721 46747) + (46693 46719) + (46665 46691) + (46637 46663) + (46609 46635) + (46581 46607) + (46553 46579) + (46525 46551) + (46497 46523) + (46469 46495) + (46441 46467) + (46413 46439) + (46385 46411) + (46357 46383) + (46329 46355) + (46301 46327) + (46273 46299) + (46245 46271) + (46217 46243) + (46189 46215) + (46161 46187) + (46133 46159) + (46105 46131) + (46077 46103) + (46049 46075) + (46021 46047) + (45993 46019) + (45965 45991) + (45937 45963) + (45909 45935) + (45881 45907) + (45853 45879) + (45825 45851) + (45797 45823) + (45769 45795) + (45741 45767) + (45713 45739) + (45685 45711) + (45657 45683) + (45629 45655) + (45601 45627) + (45573 45599) + (45545 45571) + (45517 45543) + (45489 45515) + (45461 45487) + (45433 45459) + (45405 45431) + (45377 45403) + (45349 45375) + (45321 45347) + (45293 45319) + (45265 45291) + (45237 45263) + (45209 45235) + (45181 45207) + (45153 45179) + (45125 45151) + (45097 45123) + (45069 45095) + (45041 45067) + (45013 45039) + (44985 45011) + (44957 44983) + (44929 44955) + (44901 44927) + (44873 44899) + (44845 44871) + (44817 44843) + (44789 44815) + (44761 44787) + (44733 44759) + (44705 44731) + (44677 44703) + (44649 44675) + (44621 44647) + (44593 44619) + (44565 44591) + (44537 44563) + (44509 44535) + (44481 44507) + (44453 44479) + (44425 44451) + (44397 44423) + (44369 44395) + (44341 44367) + (44313 44339) + (44285 44311) + (44257 44283) + (44229 44255) + (44201 44227) + (44173 44199) + (44145 44171) + (44117 44143) + (44089 44115) + (44061 44087) + (44033 44059)) + (zero-width-joiner (8205 8205))))) (define char-set:grapheme-hangul-syllable-l (char-set)) (define char-set:grapheme-hangul-syllable-v (char-set)) diff --git a/uniseg/internal.scm b/uniseg/internal.scm index ac7eff2..d22b9ee 100644 --- a/uniseg/internal.scm +++ b/uniseg/internal.scm @@ -97,7 +97,7 @@ (cons-hash-list! hashtable property f l))) #:unwind? #t))))) - (define (output-boilerplate) + (define (print-to-file) (pretty-print `(define hashtable (alist->hashq-table ',(hash-map->list cons hashtable)))) @@ -134,7 +134,7 @@ (display "\n")) - (values process-line output-boilerplate)) + (values process-line print-to-file)) ;; Helper macro to add a list of character ranges ;; to a hash-set.