diff --git a/scripts/generate-eastasian.in b/scripts/generate-eastasian.in index a57aad3..b93f8c0 100644 --- a/scripts/generate-eastasian.in +++ b/scripts/generate-eastasian.in @@ -19,57 +19,19 @@ (define url "https://unicode.org/Public/13.0.0/ucd/EastAsianWidth.txt") -(define-peg-pattern @ea-width-prop all - (or "A" "F" "H" "Na" "N" "W")) - -(define-peg-pattern @ea-datum body - (and @codepoint-range (* @ws) (ignore ";") (* @ws) @ea-width-prop)) - -(define-peg-pattern @ea-line body - (and @ea-datum (* @ws) @comment)) - (define eastasian-ht (make-hash-table 6)) -(define (process-line line) - (define (string->property str comment) - (if (string-contains comment "COMBINING") - 'combining - (match str - ((or "W" "F") 'doublewidth) - ("H" 'halfwidth) - ("Na" 'narrow) - ("N" 'neutral) - ("A" 'ambiguous)))) +(define (string->property str comment) + (if (string-contains comment "COMBINING") + 'combining + (match str + ((or "W" "F") 'doublewidth) + ("H" 'halfwidth) + ("Na" 'narrow) + ("N" 'neutral) + ("A" 'ambiguous)))) - (define tree (peg:tree (match-pattern @ea-line line))) - - (unless (or (not tree) - (null? tree) - (eq? '@comment (car tree))) - - (match tree - (((('@codepoint-range - ('@codepoint codepoints) ...) - ('@ea-width-prop prop-str)) - ('@comment comment)) - - (with-exception-handler - (λ (err) - (format stdout "Skipping line due to error :: ") - (format-exception-msg stdout err)) - (λ () - (let ((f (hex-string->integer (first codepoints))) - (l (hex-string->integer (last codepoints))) - (width-prop (string->property prop-str comment))) - - (when (or (in-surrogate-range f) - (in-surrogate-range l)) - (error (format #f "chars in surrogate range ~x -> ~x" f l))) - - (cons-hash-list! eastasian-ht width-prop f l))) - #:unwind? #t))))) - -(define ea-sets +(define eastasian-properties '(combining doublewidth halfwidth @@ -77,18 +39,14 @@ neutral ambiguous)) -(define ea-symbol-names +(define eastasian-symbols (map (λ (set) (string->symbol (string-concatenate (list "char-set:eastasian-" (symbol->string set))))) - ea-sets)) - -(define ea-sets-and-symbols - (zip ea-sets ea-symbol-names)) - + eastasian-properties)) (define file "uniseg/eastasian.scm") @@ -98,49 +56,25 @@ (λ () (format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (basename (current-filename))) - (for-each process-line (cmdline-wget-or-file url stdout)) - (pretty-print `(define-module (uniseg eastasian) + #:use-module (uniseg internal) #:use-module (ice-9 hash-table) #:use-module (srfi srfi-1) - #:use-module (uniseg internal) - #:export (,@ea-symbol-names - eastasian-charsets))) + #:export (,@eastasian-symbols + eastasian-charsets))) - (pretty-print - `(define eastasian-ht - (alist->hashq-table ',(hash-map->list cons eastasian-ht)))) + (define-values (process-line output-boilerplate) + (make-line-processor + eastasian-ht + string->property + eastasian-properties + eastasian-symbols + 'eastasian-charsets + stdout)) - (display "\n") - - (for-each - (λ (sym) - (pretty-print - `(define ,sym (char-set)))) - ea-symbol-names) - - (display "\n") - - (pretty-print - `(define eastasian-charsets - (list - ,@(map - (λ (pair) - (let ((f (first pair)) - (s (second pair))) - `(list ',f ,s))) - ea-sets-and-symbols)))) - - (display "\n") - - (for-each - (λ (set-pair) - (let ((name (first set-pair)) - (symbol (second set-pair))) - (pretty-print - `(ranges->charset! eastasian-ht ',name ,symbol)))) - ea-sets-and-symbols) + (for-each process-line (cmdline-wget-or-file url stdout)) + (output-boilerplate) (display "Code generation complete.\n" stdout))) diff --git a/scripts/generate-emoji.in b/scripts/generate-emoji.in index 8879ca0..f39afe4 100644 --- a/scripts/generate-emoji.in +++ b/scripts/generate-emoji.in @@ -8,8 +8,6 @@ (uniseg internal) (ice-9 pretty-print) (ice-9 peg) - (ice-9 format) - (ice-9 exceptions) (ice-9 match) (srfi srfi-1)) @@ -18,18 +16,9 @@ (define url "https://unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt") -(define-peg-pattern @emoji-category all - (* (peg "[a-zA-Z_]"))) - -(define-peg-pattern @emoji-datum body - (and @codepoint-range (* @ws) (ignore ";") (* @ws) @emoji-category)) - -(define-peg-pattern @emoji-line body - (and @emoji-datum (* @ws) @comment)) - (define emoji-ht (make-hash-table 5)) -(define emoji-sets +(define emoji-properties '(emoji emoji-presentation emoji-modifier @@ -37,55 +26,23 @@ emoji-component emoji-extended-pictographic)) -(define emoji-symbol-names +(define emoji-symbols (map (λ (set) (string->symbol (string-concatenate (list "char-set:" (symbol->string set))))) - emoji-sets)) + emoji-properties)) -(define emoji-sets-and-symbols - (zip emoji-sets emoji-symbol-names)) - -(define (process-line line) - (define (string->category str) - (match str - ("Emoji" 'emoji) - ("Emoji_Presentation" 'emoji-presentation) - ("Emoji_Modifier" 'emoji-modifier) - ("Emoji_Modifier_Base" 'emoji-modifier-base) - ("Emoji_Component" 'emoji-component) - ("Extended_Pictographic" 'emoji-extended-pictographic))) - - (define tree (peg:tree (match-pattern @emoji-line line))) - - (unless (or (not tree) - (null? tree) - (eq? '@comment (car tree))) - - (match tree - (((('@codepoint-range - ('@codepoint codepoints) ...) - ('@emoji-category cat-str)) - ('@comment comment)) - - (with-exception-handler - (λ (err) - (format stdout "Skipping line due to error :: ") - (format-exception-msg stdout err)) - (λ () - (let ((f (hex-string->integer (first codepoints))) - (l (hex-string->integer (last codepoints))) - (category (string->category cat-str))) - - (when (or (in-surrogate-range f) - (in-surrogate-range l)) - (error (format #f "chars in surrogate range ~x -> ~x" f l))) - - (cons-hash-list! emoji-ht category f l))) - #:unwind? #t))))) +(define (string->property str comment) + (match str + ("Emoji" 'emoji) + ("Emoji_Presentation" 'emoji-presentation) + ("Emoji_Modifier" 'emoji-modifier) + ("Emoji_Modifier_Base" 'emoji-modifier-base) + ("Emoji_Component" 'emoji-component) + ("Extended_Pictographic" 'emoji-extended-pictographic))) (define file "uniseg/emoji.scm") @@ -95,49 +52,25 @@ (λ () (format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (basename (current-filename))) - (for-each process-line (cmdline-wget-or-file url stdout)) - (pretty-print `(define-module (uniseg emoji) #:use-module (uniseg internal) #:use-module (ice-9 hash-table) #:use-module (srfi srfi-1) - #:export (,@emoji-symbol-names + #:export (,@emoji-symbols emoji-charsets))) - (pretty-print - `(define emoji-ht - (alist->hashq-table ',(hash-map->list cons emoji-ht)))) + (define-values (process-line output-boilerplate) + (make-line-processor + emoji-ht + string->property + emoji-properties + emoji-symbols + 'emoji-charsets + stdout)) - (display "\n") - - (for-each - (λ (sym) - (pretty-print - `(define ,sym (char-set)))) - emoji-symbol-names) - - (display "\n") - - (pretty-print - `(define emoji-charsets - (list - ,@(map - (λ (pair) - (let ((f (first pair)) - (s (second pair))) - `(list ',f ,s))) - emoji-sets-and-symbols)))) - - (display "\n") - - (for-each - (λ (set-pair) - (let ((name (first set-pair)) - (symbol (second set-pair))) - (pretty-print - `(ranges->charset! emoji-ht ',name ,symbol)))) - emoji-sets-and-symbols) + (for-each process-line (cmdline-wget-or-file url stdout)) + (output-boilerplate) (display "Code generation complete.\n" stdout))) diff --git a/scripts/generate-graphemes.in b/scripts/generate-graphemes.in index cbbe0a9..9e9d112 100644 --- a/scripts/generate-graphemes.in +++ b/scripts/generate-graphemes.in @@ -19,18 +19,9 @@ (define url "https://www.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt") -(define-peg-pattern @grapheme-category all - (* (peg "[a-zA-Z_]"))) - -(define-peg-pattern @grapheme-datum body - (and @codepoint-range (* @ws) (ignore ";") (* @ws) @grapheme-category)) - -(define-peg-pattern @grapheme-line body - (and @grapheme-datum (* @ws) @comment)) - (define grapheme-ht (make-hash-table 13)) -(define grapheme-sets +(define grapheme-properties '(hangul-syllable-l hangul-syllable-v hangul-syllable-lv @@ -44,63 +35,30 @@ spacing-mark zero-width-joiner)) -(define grapheme-symbol-names +(define grapheme-symbols (map (λ (set) (string->symbol (string-concatenate (list "char-set:grapheme-" (symbol->string set))))) - grapheme-sets)) - -(define grapheme-sets-and-symbols - (zip grapheme-sets grapheme-symbol-names)) - -(define (process-line line) - (define (string->category str) - (match str - ("L" 'hangul-syllable-l) - ("V" 'hangul-syllable-v) - ("T" 'hangul-syllable-t) - ("LV" 'hangul-syllable-lv) - ("LVT" 'hangul-syllable-lvt) - ("Prepend" 'prepend) - ("CR" 'carriage-return) - ("LF" 'line-feed) - ("Control" 'control) - ("Extend" 'extend) - ("Regional_Indicator" 'regional-indicator) - ("SpacingMark" 'spacing-mark) - ("ZWJ" 'zero-width-joiner))) - - (define tree (peg:tree (match-pattern @grapheme-line line))) - - (unless (or (not tree) - (null? tree) - (eq? '@comment (car tree))) - - (match tree - (((('@codepoint-range - ('@codepoint codepoints) ...) - ('@grapheme-category cat-str)) - ('@comment comment)) - - (with-exception-handler - (λ (err) - (format stdout "Skipping line due to error :: ") - (format-exception-msg stdout err)) - (λ () - (let ((f (hex-string->integer (first codepoints))) - (l (hex-string->integer (last codepoints))) - (category (string->category cat-str))) - - (when (or (in-surrogate-range f) - (in-surrogate-range l)) - (error (format #f "chars in surrogate range ~x -> ~x" f l))) - - (cons-hash-list! grapheme-ht category f l))) - #:unwind? #t))))) + grapheme-properties)) +(define (string->property str comment) + (match str + ("L" 'hangul-syllable-l) + ("V" 'hangul-syllable-v) + ("T" 'hangul-syllable-t) + ("LV" 'hangul-syllable-lv) + ("LVT" 'hangul-syllable-lvt) + ("Prepend" 'prepend) + ("CR" 'carriage-return) + ("LF" 'line-feed) + ("Control" 'control) + ("Extend" 'extend) + ("Regional_Indicator" 'regional-indicator) + ("SpacingMark" 'spacing-mark) + ("ZWJ" 'zero-width-joiner))) (define file "uniseg/graphemes.scm") @@ -110,53 +68,32 @@ (λ () (format #t ";; Code generated by ~a. DO NOT EDIT\n\n" (basename (current-filename))) - (for-each process-line (cmdline-wget-or-file url stdout)) - (pretty-print `(define-module (uniseg graphemes) #:use-module (ice-9 hash-table) #:use-module (srfi srfi-1) #:use-module (uniseg internal) #:use-module (uniseg emoji) - #:export (,@grapheme-symbol-names + #:export (,@grapheme-symbols grapheme-charsets))) + (define-values (process-line output-boilerplate) + (make-line-processor + grapheme-ht + string->property + grapheme-properties + grapheme-symbols + 'grapheme-charsets + stdout)) + + (for-each process-line (cmdline-wget-or-file url stdout)) + (output-boilerplate) + + ;; Need emoji in the set as well. (pretty-print - `(define grapheme-ht - (alist->hashq-table ',(hash-map->list cons grapheme-ht)))) - - (display "\n") - - (for-each - (λ (sym) - (pretty-print - `(define ,sym (char-set)))) - grapheme-symbol-names) - - (display "\n") - - (pretty-print - `(define grapheme-charsets - (list - ,@(map - (λ (pair) - (let ((f (first pair)) - (s (second pair))) - `(list ',f ,s))) - grapheme-sets-and-symbols) - ;; Need emoji in this set too! - (list 'extended-pictographic char-set:emoji-extended-pictographic)))) - - - (display "\n") - - (for-each - (λ (set-pair) - (let ((name (first set-pair)) - (symbol (second set-pair))) - (pretty-print - `(ranges->charset! grapheme-ht ',name ,symbol)))) - grapheme-sets-and-symbols) + `(set! grapheme-charsets + (cons (list 'extended-pictographic char-set:emoji-extended-pictographic) + grapheme-charsets))) (display "Code generation complete.\n" stdout))) diff --git a/uniseg/eastasian.scm b/uniseg/eastasian.scm index b024247..9db505d 100644 --- a/uniseg/eastasian.scm +++ b/uniseg/eastasian.scm @@ -3,11 +3,11 @@ (define-module (uniseg eastasian) #:use-module + (uniseg internal) + #:use-module (ice-9 hash-table) #:use-module (srfi srfi-1) - #:use-module - (uniseg internal) #:export (char-set:eastasian-combining char-set:eastasian-doublewidth @@ -16,285 +16,26 @@ char-set:eastasian-neutral char-set:eastasian-ambiguous eastasian-charsets)) -(define eastasian-ht +(define hashtable (alist->hashq-table - '((doublewidth - (201547 262141) - (196608 201546) - (195104 196605) - (195102 195103) - (194560 195101) - (191457 194559) - (183984 191456) - (183970 183983) - (178208 183969) - (178206 178207) - (177984 178205) - (177973 177983) - (173824 177972) - (173790 173823) - (131072 173789) - (129744 129750) - (129728 129730) - (129712 129718) - (129680 129704) - (129664 129670) - (129656 129658) - (129648 129652) - (129485 129535) - (129402 129483) - (129351 129400) - (129340 129349) - (129292 129338) - (128992 129003) - (128756 128764) - (128747 128748) - (128725 128727) - (128720 128722) - (128716 128716) - (128640 128709) - (128512 128591) - (128507 128511) - (128420 128420) - (128405 128406) - (128378 128378) - (128336 128359) - (128331 128334) - (128255 128317) - (128066 128252) - (128064 128064) - (128000 128062) - (127995 127999) - (127992 127994) - (127988 127988) - (127968 127984) - (127951 127955) - (127904 127946) - (127870 127891) - (127799 127868) - (127789 127797) - (127744 127776) - (127584 127589) - (127568 127569) - (127552 127560) - (127504 127547) - (127488 127490) - (127377 127386) - (127374 127374) - (127183 127183) - (126980 126980) - (110960 111355) - (110948 110951) - (110928 110930) - (110848 110878) - (110592 110847) - (101632 101640) - (101120 101589) - (100352 101119) - (94208 100343) - (94192 94193) - (94180 94180) - (94179 94179) - (94178 94178) - (94176 94177) - (65509 65510) - (65508 65508) - (65507 65507) - (65506 65506) - (65504 65505) - (65376 65376) - (65375 65375) - (65374 65374) - (65373 65373) - (65372 65372) - (65371 65371) - (65345 65370) - (65344 65344) - (65343 65343) - (65342 65342) - (65341 65341) - (65340 65340) - (65339 65339) - (65313 65338) - (65311 65312) - (65308 65310) - (65306 65307) - (65296 65305) - (65294 65295) - (65293 65293) - (65292 65292) - (65291 65291) - (65290 65290) - (65289 65289) - (65288 65288) - (65285 65287) - (65284 65284) - (65281 65283) - (65130 65131) - (65129 65129) - (65128 65128) - (65124 65126) - (65123 65123) - (65122 65122) - (65119 65121) - (65118 65118) - (65117 65117) - (65116 65116) - (65115 65115) - (65114 65114) - (65113 65113) - (65112 65112) - (65108 65111) - (65104 65106) - (65101 65103) - (65097 65100) - (65096 65096) - (65095 65095) - (65093 65094) - (65092 65092) - (65091 65091) - (65090 65090) - (65089 65089) - (65088 65088) - (65087 65087) - (65086 65086) - (65085 65085) - (65084 65084) - (65083 65083) - (65082 65082) - (65081 65081) - (65080 65080) - (65079 65079) - (65078 65078) - (65077 65077) - (65075 65076) - (65073 65074) - (65072 65072) - (65049 65049) - (65048 65048) - (65047 65047) - (65040 65046) - (64218 64255) - (64112 64217) - (64110 64111) - (63744 64109) - (44032 55203) - (43360 43388) - (42128 42182) - (40982 42124) - (40981 40981) - (40960 40980) - (40957 40959) - (19968 40956) - (13312 19903) - (13056 13311) - (12992 13055) - (12977 12991) - (12938 12976) - (12928 12937) - (12896 12927) - (12881 12895) - (12880 12880) - (12842 12871) - (12832 12841) - (12800 12830) - (12784 12799) - (12736 12771) - (12704 12735) - (12694 12703) - (12690 12693) - (12688 12689) - (12593 12686) - (12549 12591) - (12543 12543) - (12540 12542) - (12539 12539) - (12449 12538) - (12448 12448) - (12447 12447) - (12445 12446) - (12443 12444) - (12353 12438) - (12350 12350) - (12349 12349) - (12348 12348) - (12347 12347) - (12344 12346) - (12342 12343) - (12337 12341) - (12336 12336) - (12334 12335) - (12330 12333) - (12321 12329) - (12320 12320) - (12318 12319) - (12317 12317) - (12316 12316) - (12315 12315) - (12314 12314) - (12313 12313) - (12312 12312) - (12311 12311) - (12310 12310) - (12309 12309) - (12308 12308) - (12306 12307) - (12305 12305) - (12304 12304) - (12303 12303) - (12302 12302) - (12301 12301) - (12300 12300) - (12299 12299) - (12298 12298) - (12297 12297) - (12296 12296) - (12295 12295) - (12294 12294) - (12293 12293) - (12292 12292) - (12289 12291) - (12288 12288) - (12272 12283) - (12032 12245) - (11931 12019) - (11904 11929) - (11093 11093) - (11088 11088) - (11035 11036) - (10175 10175) - (10160 10160) - (10133 10135) - (10071 10071) - (10067 10069) - (10062 10062) - (10060 10060) - (10024 10024) - (9994 9995) - (9989 9989) - (9981 9981) - (9978 9978) - (9973 9973) - (9970 9971) - (9962 9962) - (9940 9940) - (9934 9934) - (9924 9925) - (9917 9918) - (9898 9899) - (9889 9889) - (9875 9875) - (9855 9855) - (9800 9811) - (9748 9749) - (9725 9726) - (9203 9203) - (9200 9200) - (9193 9196) - (9002 9002) - (9001 9001) - (8986 8987) - (4352 4447)) + '((halfwidth + (65517 65518) + (65513 65516) + (65512 65512) + (65498 65500) + (65490 65495) + (65482 65487) + (65474 65479) + (65440 65470) + (65438 65439) + (65393 65437) + (65392 65392) + (65382 65391) + (65380 65381) + (65379 65379) + (65378 65378) + (65377 65377) + (8361 8361)) (ambiguous (1048576 1114109) (983040 1048573) @@ -493,120 +234,6 @@ (167 167) (164 164) (161 161)) - (combining - (125136 125142) - (122918 122922) - (122915 122916) - (122907 122913) - (122888 122904) - (122880 122886) - (119362 119364) - (119210 119213) - (119173 119179) - (119163 119170) - (119149 119154) - (119143 119145) - (119141 119142) - (92912 92916) - (70512 70516) - (70502 70508) - (70459 70460) - (70400 70401) - (69446 69456) - (69291 69292) - (66422 66426) - (66045 66045) - (65056 65071) - (43232 43249) - (42736 42737) - (42654 42655) - (42612 42621) - (42608 42610) - (42607 42607) - (12441 12442) - (11744 11775) - (11503 11505) - (8421 8432) - (8418 8420) - (8417 8417) - (8413 8416) - (8400 8412) - (7675 7679) - (7616 7673) - (7019 7027) - (6847 6848) - (6846 6846) - (6832 6845) - (6783 6783) - (4957 4959) - (3328 3329) - (3076 3076) - (3072 3072) - (2027 2035) - (1160 1161) - (1155 1159) - (768 879)) - (halfwidth - (65517 65518) - (65513 65516) - (65512 65512) - (65498 65500) - (65490 65495) - (65482 65487) - (65474 65479) - (65440 65470) - (65438 65439) - (65393 65437) - (65392 65392) - (65382 65391) - (65380 65381) - (65379 65379) - (65378 65378) - (65377 65377) - (8361 8361)) - (narrow - (10630 10630) - (10629 10629) - (10221 10221) - (10220 10220) - (10219 10219) - (10218 10218) - (10217 10217) - (10216 10216) - (10215 10215) - (10214 10214) - (175 175) - (172 172) - (166 166) - (165 165) - (162 163) - (126 126) - (125 125) - (124 124) - (123 123) - (97 122) - (96 96) - (95 95) - (94 94) - (93 93) - (92 92) - (91 91) - (65 90) - (63 64) - (60 62) - (58 59) - (48 57) - (46 47) - (45 45) - (44 44) - (43 43) - (42 42) - (41 41) - (40 40) - (37 39) - (36 36) - (33 35) - (32 32)) (neutral (917536 917631) (917505 917505) @@ -2498,7 +2125,380 @@ (160 160) (128 159) (127 127) - (0 31))))) + (0 31)) + (doublewidth + (201547 262141) + (196608 201546) + (195104 196605) + (195102 195103) + (194560 195101) + (191457 194559) + (183984 191456) + (183970 183983) + (178208 183969) + (178206 178207) + (177984 178205) + (177973 177983) + (173824 177972) + (173790 173823) + (131072 173789) + (129744 129750) + (129728 129730) + (129712 129718) + (129680 129704) + (129664 129670) + (129656 129658) + (129648 129652) + (129485 129535) + (129402 129483) + (129351 129400) + (129340 129349) + (129292 129338) + (128992 129003) + (128756 128764) + (128747 128748) + (128725 128727) + (128720 128722) + (128716 128716) + (128640 128709) + (128512 128591) + (128507 128511) + (128420 128420) + (128405 128406) + (128378 128378) + (128336 128359) + (128331 128334) + (128255 128317) + (128066 128252) + (128064 128064) + (128000 128062) + (127995 127999) + (127992 127994) + (127988 127988) + (127968 127984) + (127951 127955) + (127904 127946) + (127870 127891) + (127799 127868) + (127789 127797) + (127744 127776) + (127584 127589) + (127568 127569) + (127552 127560) + (127504 127547) + (127488 127490) + (127377 127386) + (127374 127374) + (127183 127183) + (126980 126980) + (110960 111355) + (110948 110951) + (110928 110930) + (110848 110878) + (110592 110847) + (101632 101640) + (101120 101589) + (100352 101119) + (94208 100343) + (94192 94193) + (94180 94180) + (94179 94179) + (94178 94178) + (94176 94177) + (65509 65510) + (65508 65508) + (65507 65507) + (65506 65506) + (65504 65505) + (65376 65376) + (65375 65375) + (65374 65374) + (65373 65373) + (65372 65372) + (65371 65371) + (65345 65370) + (65344 65344) + (65343 65343) + (65342 65342) + (65341 65341) + (65340 65340) + (65339 65339) + (65313 65338) + (65311 65312) + (65308 65310) + (65306 65307) + (65296 65305) + (65294 65295) + (65293 65293) + (65292 65292) + (65291 65291) + (65290 65290) + (65289 65289) + (65288 65288) + (65285 65287) + (65284 65284) + (65281 65283) + (65130 65131) + (65129 65129) + (65128 65128) + (65124 65126) + (65123 65123) + (65122 65122) + (65119 65121) + (65118 65118) + (65117 65117) + (65116 65116) + (65115 65115) + (65114 65114) + (65113 65113) + (65112 65112) + (65108 65111) + (65104 65106) + (65101 65103) + (65097 65100) + (65096 65096) + (65095 65095) + (65093 65094) + (65092 65092) + (65091 65091) + (65090 65090) + (65089 65089) + (65088 65088) + (65087 65087) + (65086 65086) + (65085 65085) + (65084 65084) + (65083 65083) + (65082 65082) + (65081 65081) + (65080 65080) + (65079 65079) + (65078 65078) + (65077 65077) + (65075 65076) + (65073 65074) + (65072 65072) + (65049 65049) + (65048 65048) + (65047 65047) + (65040 65046) + (64218 64255) + (64112 64217) + (64110 64111) + (63744 64109) + (44032 55203) + (43360 43388) + (42128 42182) + (40982 42124) + (40981 40981) + (40960 40980) + (40957 40959) + (19968 40956) + (13312 19903) + (13056 13311) + (12992 13055) + (12977 12991) + (12938 12976) + (12928 12937) + (12896 12927) + (12881 12895) + (12880 12880) + (12842 12871) + (12832 12841) + (12800 12830) + (12784 12799) + (12736 12771) + (12704 12735) + (12694 12703) + (12690 12693) + (12688 12689) + (12593 12686) + (12549 12591) + (12543 12543) + (12540 12542) + (12539 12539) + (12449 12538) + (12448 12448) + (12447 12447) + (12445 12446) + (12443 12444) + (12353 12438) + (12350 12350) + (12349 12349) + (12348 12348) + (12347 12347) + (12344 12346) + (12342 12343) + (12337 12341) + (12336 12336) + (12334 12335) + (12330 12333) + (12321 12329) + (12320 12320) + (12318 12319) + (12317 12317) + (12316 12316) + (12315 12315) + (12314 12314) + (12313 12313) + (12312 12312) + (12311 12311) + (12310 12310) + (12309 12309) + (12308 12308) + (12306 12307) + (12305 12305) + (12304 12304) + (12303 12303) + (12302 12302) + (12301 12301) + (12300 12300) + (12299 12299) + (12298 12298) + (12297 12297) + (12296 12296) + (12295 12295) + (12294 12294) + (12293 12293) + (12292 12292) + (12289 12291) + (12288 12288) + (12272 12283) + (12032 12245) + (11931 12019) + (11904 11929) + (11093 11093) + (11088 11088) + (11035 11036) + (10175 10175) + (10160 10160) + (10133 10135) + (10071 10071) + (10067 10069) + (10062 10062) + (10060 10060) + (10024 10024) + (9994 9995) + (9989 9989) + (9981 9981) + (9978 9978) + (9973 9973) + (9970 9971) + (9962 9962) + (9940 9940) + (9934 9934) + (9924 9925) + (9917 9918) + (9898 9899) + (9889 9889) + (9875 9875) + (9855 9855) + (9800 9811) + (9748 9749) + (9725 9726) + (9203 9203) + (9200 9200) + (9193 9196) + (9002 9002) + (9001 9001) + (8986 8987) + (4352 4447)) + (narrow + (10630 10630) + (10629 10629) + (10221 10221) + (10220 10220) + (10219 10219) + (10218 10218) + (10217 10217) + (10216 10216) + (10215 10215) + (10214 10214) + (175 175) + (172 172) + (166 166) + (165 165) + (162 163) + (126 126) + (125 125) + (124 124) + (123 123) + (97 122) + (96 96) + (95 95) + (94 94) + (93 93) + (92 92) + (91 91) + (65 90) + (63 64) + (60 62) + (58 59) + (48 57) + (46 47) + (45 45) + (44 44) + (43 43) + (42 42) + (41 41) + (40 40) + (37 39) + (36 36) + (33 35) + (32 32)) + (combining + (125136 125142) + (122918 122922) + (122915 122916) + (122907 122913) + (122888 122904) + (122880 122886) + (119362 119364) + (119210 119213) + (119173 119179) + (119163 119170) + (119149 119154) + (119143 119145) + (119141 119142) + (92912 92916) + (70512 70516) + (70502 70508) + (70459 70460) + (70400 70401) + (69446 69456) + (69291 69292) + (66422 66426) + (66045 66045) + (65056 65071) + (43232 43249) + (42736 42737) + (42654 42655) + (42612 42621) + (42608 42610) + (42607 42607) + (12441 12442) + (11744 11775) + (11503 11505) + (8421 8432) + (8418 8420) + (8417 8417) + (8413 8416) + (8400 8412) + (7675 7679) + (7616 7673) + (7019 7027) + (6847 6848) + (6846 6846) + (6832 6845) + (6783 6783) + (4957 4959) + (3328 3329) + (3076 3076) + (3072 3072) + (2027 2035) + (1160 1161) + (1155 1159) + (768 879))))) (define char-set:eastasian-combining (char-set)) (define char-set:eastasian-doublewidth (char-set)) @@ -2515,9 +2515,10 @@ (list 'neutral char-set:eastasian-neutral) (list 'ambiguous char-set:eastasian-ambiguous))) -(ranges->charset! eastasian-ht 'combining char-set:eastasian-combining) -(ranges->charset! eastasian-ht 'doublewidth char-set:eastasian-doublewidth) -(ranges->charset! eastasian-ht 'halfwidth char-set:eastasian-halfwidth) -(ranges->charset! eastasian-ht 'narrow char-set:eastasian-narrow) -(ranges->charset! eastasian-ht 'neutral char-set:eastasian-neutral) -(ranges->charset! eastasian-ht 'ambiguous char-set:eastasian-ambiguous) +(ranges->charset! hashtable 'combining char-set:eastasian-combining) +(ranges->charset! hashtable 'doublewidth char-set:eastasian-doublewidth) +(ranges->charset! hashtable 'halfwidth char-set:eastasian-halfwidth) +(ranges->charset! hashtable 'narrow char-set:eastasian-narrow) +(ranges->charset! hashtable 'neutral char-set:eastasian-neutral) +(ranges->charset! hashtable 'ambiguous char-set:eastasian-ambiguous) + diff --git a/uniseg/emoji.scm b/uniseg/emoji.scm index 8093044..d480cdd 100644 --- a/uniseg/emoji.scm +++ b/uniseg/emoji.scm @@ -16,7 +16,7 @@ char-set:emoji-component char-set:emoji-extended-pictographic emoji-charsets)) -(define emoji-ht +(define hashtable (alist->hashq-table '((emoji-presentation (129744 129750) @@ -280,6 +280,437 @@ (9200 9200) (9193 9196) (8986 8987)) + (emoji-modifier-base + (129489 129501) + (129485 129487) + (129467 129467) + (129464 129465) + (129461 129462) + (129399 129399) + (129340 129342) + (129331 129337) + (129329 129330) + (129328 129328) + (129318 129318) + (129311 129311) + (129305 129310) + (129304 129304) + (129295 129295) + (129292 129292) + (128716 128716) + (128704 128704) + (128694 128694) + (128692 128693) + (128675 128675) + (128587 128591) + (128581 128583) + (128405 128406) + (128400 128400) + (128378 128378) + (128372 128373) + (128170 128170) + (128145 128145) + (128143 128143) + (128133 128135) + (128129 128131) + (128124 128124) + (128110 128120) + (128108 128109) + (128102 128107) + (128070 128080) + (128066 128067) + (127947 127948) + (127946 127946) + (127943 127943) + (127938 127940) + (127877 127877) + (9997 9997) + (9994 9996) + (9977 9977) + (9757 9757)) + (emoji (129744 129750) + (129728 129730) + (129712 129718) + (129686 129704) + (129680 129685) + (129667 129670) + (129664 129666) + (129656 129658) + (129652 129652) + (129648 129651) + (129511 129535) + (129488 129510) + (129485 129487) + (129483 129483) + (129475 129482) + (129473 129474) + (129472 129472) + (129466 129471) + (129456 129465) + (129454 129455) + (129451 129453) + (129445 129450) + (129443 129444) + (129432 129442) + (129426 129431) + (129413 129425) + (129408 129412) + (129404 129407) + (129403 129403) + (129402 129402) + (129399 129400) + (129395 129398) + (129394 129394) + (129393 129393) + (129388 129392) + (129375 129387) + (129360 129374) + (129357 129359) + (129356 129356) + (129351 129355) + (129344 129349) + (129343 129343) + (129340 129342) + (129331 129338) + (129329 129330) + (129328 129328) + (129320 129327) + (129312 129319) + (129311 129311) + (129305 129310) + (129296 129304) + (129293 129295) + (129292 129292) + (128992 129003) + (128763 128764) + (128762 128762) + (128761 128761) + (128759 128760) + (128756 128758) + (128755 128755) + (128752 128752) + (128747 128748) + (128745 128745) + (128736 128741) + (128726 128727) + (128725 128725) + (128721 128722) + (128720 128720) + (128717 128719) + (128716 128716) + (128715 128715) + (128705 128709) + (128704 128704) + (128703 128703) + (128697 128702) + (128695 128696) + (128694 128694) + (128691 128693) + (128690 128690) + (128686 128689) + (128679 128685) + (128678 128678) + (128676 128677) + (128675 128675) + (128674 128674) + (128667 128673) + (128665 128666) + (128664 128664) + (128663 128663) + (128662 128662) + (128661 128661) + (128660 128660) + (128657 128659) + (128656 128656) + (128655 128655) + (128654 128654) + (128653 128653) + (128652 128652) + (128650 128651) + (128649 128649) + (128648 128648) + (128647 128647) + (128646 128646) + (128643 128645) + (128641 128642) + (128640 128640) + (128581 128591) + (128577 128580) + (128567 128576) + (128566 128566) + (128565 128565) + (128564 128564) + (128560 128563) + (128558 128559) + (128557 128557) + (128556 128556) + (128552 128555) + (128550 128551) + (128544 128549) + (128543 128543) + (128540 128542) + (128539 128539) + (128538 128538) + (128537 128537) + (128536 128536) + (128535 128535) + (128534 128534) + (128533 128533) + (128530 128532) + (128529 128529) + (128528 128528) + (128527 128527) + (128526 128526) + (128521 128525) + (128519 128520) + (128513 128518) + (128512 128512) + (128507 128511) + (128506 128506) + (128499 128499) + (128495 128495) + (128488 128488) + (128483 128483) + (128481 128481) + (128476 128478) + (128465 128467) + (128450 128452) + (128444 128444) + (128433 128434) + (128424 128424) + (128421 128421) + (128420 128420) + (128405 128406) + (128400 128400) + (128394 128397) + (128391 128391) + (128378 128378) + (128371 128377) + (128367 128368) + (128348 128359) + (128336 128347) + (128331 128334) + (128329 128330) + (128302 128317) + (128300 128301) + (128278 128299) + (128277 128277) + (128266 128276) + (128265 128265) + (128264 128264) + (128260 128263) + (128259 128259) + (128255 128258) + (128253 128253) + (128249 128252) + (128248 128248) + (128246 128247) + (128245 128245) + (128240 128244) + (128239 128239) + (128238 128238) + (128236 128237) + (128184 128235) + (128182 128183) + (128174 128181) + (128173 128173) + (128110 128172) + (128108 128109) + (128102 128107) + (128101 128101) + (128066 128100) + (128065 128065) + (128064 128064) + (128063 128063) + (128043 128062) + (128042 128042) + (128023 128041) + (128022 128022) + (128021 128021) + (128020 128020) + (128019 128019) + (128017 128018) + (128015 128016) + (128012 128014) + (128009 128011) + (128008 128008) + (127992 128007) + (127991 127991) + (127989 127989) + (127988 127988) + (127987 127987) + (127973 127984) + (127972 127972) + (127968 127971) + (127956 127967) + (127951 127955) + (127947 127950) + (127946 127946) + (127945 127945) + (127944 127944) + (127943 127943) + (127942 127942) + (127941 127941) + (127904 127940) + (127902 127903) + (127897 127899) + (127894 127895) + (127872 127891) + (127870 127871) + (127869 127869) + (127868 127868) + (127825 127867) + (127824 127824) + (127820 127823) + (127819 127819) + (127799 127818) + (127798 127798) + (127796 127797) + (127794 127795) + (127792 127793) + (127789 127791) + (127780 127788) + (127777 127777) + (127775 127776) + (127773 127774) + (127772 127772) + (127771 127771) + (127770 127770) + (127769 127769) + (127766 127768) + (127763 127765) + (127762 127762) + (127761 127761) + (127760 127760) + (127759 127759) + (127757 127758) + (127744 127756) + (127568 127569) + (127538 127546) + (127535 127535) + (127514 127514) + (127489 127490) + (127462 127487) + (127377 127386) + (127374 127374) + (127358 127359) + (127344 127345) + (127183 127183) + (126980 126980) + (12953 12953) + (12951 12951) + (12349 12349) + (12336 12336) + (11093 11093) + (11088 11088) + (11035 11036) + (11013 11015) + (10548 10549) + (10175 10175) + (10160 10160) + (10145 10145) + (10133 10135) + (10084 10084) + (10083 10083) + (10071 10071) + (10067 10069) + (10062 10062) + (10060 10060) + (10055 10055) + (10052 10052) + (10035 10036) + (10024 10024) + (10017 10017) + (10013 10013) + (10006 10006) + (10004 10004) + (10002 10002) + (9999 9999) + (9997 9997) + (9992 9996) + (9989 9989) + (9986 9986) + (9981 9981) + (9978 9978) + (9975 9977) + (9973 9973) + (9972 9972) + (9970 9971) + (9968 9969) + (9962 9962) + (9961 9961) + (9940 9940) + (9939 9939) + (9937 9937) + (9935 9935) + (9934 9934) + (9928 9928) + (9924 9925) + (9917 9918) + (9904 9905) + (9898 9899) + (9895 9895) + (9888 9889) + (9883 9884) + (9881 9881) + (9878 9879) + (9877 9877) + (9876 9876) + (9875 9875) + (9874 9874) + (9855 9855) + (9854 9854) + (9851 9851) + (9832 9832) + (9829 9830) + (9827 9827) + (9824 9824) + (9823 9823) + (9800 9811) + (9794 9794) + (9792 9792) + (9786 9786) + (9784 9785) + (9775 9775) + (9774 9774) + (9770 9770) + (9766 9766) + (9762 9763) + (9760 9760) + (9757 9757) + (9752 9752) + (9748 9749) + (9745 9745) + (9742 9742) + (9732 9732) + (9730 9731) + (9728 9729) + (9723 9726) + (9664 9664) + (9654 9654) + (9642 9643) + (9410 9410) + (9208 9210) + (9203 9203) + (9201 9202) + (9200 9200) + (9199 9199) + (9197 9198) + (9193 9196) + (9167 9167) + (9000 9000) + (8986 8987) + (8617 8618) + (8596 8601) + (8505 8505) + (8482 8482) + (8265 8265) + (8252 8252) + (174 174) + (169 169) + (48 57) + (42 42) + (35 35)) (emoji-extended-pictographic (130048 131069) (129751 129791) @@ -772,438 +1203,6 @@ (8252 8252) (174 174) (169 169)) - (emoji-modifier-base - (129489 129501) - (129485 129487) - (129467 129467) - (129464 129465) - (129461 129462) - (129399 129399) - (129340 129342) - (129331 129337) - (129329 129330) - (129328 129328) - (129318 129318) - (129311 129311) - (129305 129310) - (129304 129304) - (129295 129295) - (129292 129292) - (128716 128716) - (128704 128704) - (128694 128694) - (128692 128693) - (128675 128675) - (128587 128591) - (128581 128583) - (128405 128406) - (128400 128400) - (128378 128378) - (128372 128373) - (128170 128170) - (128145 128145) - (128143 128143) - (128133 128135) - (128129 128131) - (128124 128124) - (128110 128120) - (128108 128109) - (128102 128107) - (128070 128080) - (128066 128067) - (127947 127948) - (127946 127946) - (127943 127943) - (127938 127940) - (127877 127877) - (9997 9997) - (9994 9996) - (9977 9977) - (9757 9757)) - (emoji-modifier (127995 127999)) - (emoji (129744 129750) - (129728 129730) - (129712 129718) - (129686 129704) - (129680 129685) - (129667 129670) - (129664 129666) - (129656 129658) - (129652 129652) - (129648 129651) - (129511 129535) - (129488 129510) - (129485 129487) - (129483 129483) - (129475 129482) - (129473 129474) - (129472 129472) - (129466 129471) - (129456 129465) - (129454 129455) - (129451 129453) - (129445 129450) - (129443 129444) - (129432 129442) - (129426 129431) - (129413 129425) - (129408 129412) - (129404 129407) - (129403 129403) - (129402 129402) - (129399 129400) - (129395 129398) - (129394 129394) - (129393 129393) - (129388 129392) - (129375 129387) - (129360 129374) - (129357 129359) - (129356 129356) - (129351 129355) - (129344 129349) - (129343 129343) - (129340 129342) - (129331 129338) - (129329 129330) - (129328 129328) - (129320 129327) - (129312 129319) - (129311 129311) - (129305 129310) - (129296 129304) - (129293 129295) - (129292 129292) - (128992 129003) - (128763 128764) - (128762 128762) - (128761 128761) - (128759 128760) - (128756 128758) - (128755 128755) - (128752 128752) - (128747 128748) - (128745 128745) - (128736 128741) - (128726 128727) - (128725 128725) - (128721 128722) - (128720 128720) - (128717 128719) - (128716 128716) - (128715 128715) - (128705 128709) - (128704 128704) - (128703 128703) - (128697 128702) - (128695 128696) - (128694 128694) - (128691 128693) - (128690 128690) - (128686 128689) - (128679 128685) - (128678 128678) - (128676 128677) - (128675 128675) - (128674 128674) - (128667 128673) - (128665 128666) - (128664 128664) - (128663 128663) - (128662 128662) - (128661 128661) - (128660 128660) - (128657 128659) - (128656 128656) - (128655 128655) - (128654 128654) - (128653 128653) - (128652 128652) - (128650 128651) - (128649 128649) - (128648 128648) - (128647 128647) - (128646 128646) - (128643 128645) - (128641 128642) - (128640 128640) - (128581 128591) - (128577 128580) - (128567 128576) - (128566 128566) - (128565 128565) - (128564 128564) - (128560 128563) - (128558 128559) - (128557 128557) - (128556 128556) - (128552 128555) - (128550 128551) - (128544 128549) - (128543 128543) - (128540 128542) - (128539 128539) - (128538 128538) - (128537 128537) - (128536 128536) - (128535 128535) - (128534 128534) - (128533 128533) - (128530 128532) - (128529 128529) - (128528 128528) - (128527 128527) - (128526 128526) - (128521 128525) - (128519 128520) - (128513 128518) - (128512 128512) - (128507 128511) - (128506 128506) - (128499 128499) - (128495 128495) - (128488 128488) - (128483 128483) - (128481 128481) - (128476 128478) - (128465 128467) - (128450 128452) - (128444 128444) - (128433 128434) - (128424 128424) - (128421 128421) - (128420 128420) - (128405 128406) - (128400 128400) - (128394 128397) - (128391 128391) - (128378 128378) - (128371 128377) - (128367 128368) - (128348 128359) - (128336 128347) - (128331 128334) - (128329 128330) - (128302 128317) - (128300 128301) - (128278 128299) - (128277 128277) - (128266 128276) - (128265 128265) - (128264 128264) - (128260 128263) - (128259 128259) - (128255 128258) - (128253 128253) - (128249 128252) - (128248 128248) - (128246 128247) - (128245 128245) - (128240 128244) - (128239 128239) - (128238 128238) - (128236 128237) - (128184 128235) - (128182 128183) - (128174 128181) - (128173 128173) - (128110 128172) - (128108 128109) - (128102 128107) - (128101 128101) - (128066 128100) - (128065 128065) - (128064 128064) - (128063 128063) - (128043 128062) - (128042 128042) - (128023 128041) - (128022 128022) - (128021 128021) - (128020 128020) - (128019 128019) - (128017 128018) - (128015 128016) - (128012 128014) - (128009 128011) - (128008 128008) - (127992 128007) - (127991 127991) - (127989 127989) - (127988 127988) - (127987 127987) - (127973 127984) - (127972 127972) - (127968 127971) - (127956 127967) - (127951 127955) - (127947 127950) - (127946 127946) - (127945 127945) - (127944 127944) - (127943 127943) - (127942 127942) - (127941 127941) - (127904 127940) - (127902 127903) - (127897 127899) - (127894 127895) - (127872 127891) - (127870 127871) - (127869 127869) - (127868 127868) - (127825 127867) - (127824 127824) - (127820 127823) - (127819 127819) - (127799 127818) - (127798 127798) - (127796 127797) - (127794 127795) - (127792 127793) - (127789 127791) - (127780 127788) - (127777 127777) - (127775 127776) - (127773 127774) - (127772 127772) - (127771 127771) - (127770 127770) - (127769 127769) - (127766 127768) - (127763 127765) - (127762 127762) - (127761 127761) - (127760 127760) - (127759 127759) - (127757 127758) - (127744 127756) - (127568 127569) - (127538 127546) - (127535 127535) - (127514 127514) - (127489 127490) - (127462 127487) - (127377 127386) - (127374 127374) - (127358 127359) - (127344 127345) - (127183 127183) - (126980 126980) - (12953 12953) - (12951 12951) - (12349 12349) - (12336 12336) - (11093 11093) - (11088 11088) - (11035 11036) - (11013 11015) - (10548 10549) - (10175 10175) - (10160 10160) - (10145 10145) - (10133 10135) - (10084 10084) - (10083 10083) - (10071 10071) - (10067 10069) - (10062 10062) - (10060 10060) - (10055 10055) - (10052 10052) - (10035 10036) - (10024 10024) - (10017 10017) - (10013 10013) - (10006 10006) - (10004 10004) - (10002 10002) - (9999 9999) - (9997 9997) - (9992 9996) - (9989 9989) - (9986 9986) - (9981 9981) - (9978 9978) - (9975 9977) - (9973 9973) - (9972 9972) - (9970 9971) - (9968 9969) - (9962 9962) - (9961 9961) - (9940 9940) - (9939 9939) - (9937 9937) - (9935 9935) - (9934 9934) - (9928 9928) - (9924 9925) - (9917 9918) - (9904 9905) - (9898 9899) - (9895 9895) - (9888 9889) - (9883 9884) - (9881 9881) - (9878 9879) - (9877 9877) - (9876 9876) - (9875 9875) - (9874 9874) - (9855 9855) - (9854 9854) - (9851 9851) - (9832 9832) - (9829 9830) - (9827 9827) - (9824 9824) - (9823 9823) - (9800 9811) - (9794 9794) - (9792 9792) - (9786 9786) - (9784 9785) - (9775 9775) - (9774 9774) - (9770 9770) - (9766 9766) - (9762 9763) - (9760 9760) - (9757 9757) - (9752 9752) - (9748 9749) - (9745 9745) - (9742 9742) - (9732 9732) - (9730 9731) - (9728 9729) - (9723 9726) - (9664 9664) - (9654 9654) - (9642 9643) - (9410 9410) - (9208 9210) - (9203 9203) - (9201 9202) - (9200 9200) - (9199 9199) - (9197 9198) - (9193 9196) - (9167 9167) - (9000 9000) - (8986 8987) - (8617 8618) - (8596 8601) - (8505 8505) - (8482 8482) - (8265 8265) - (8252 8252) - (174 174) - (169 169) - (48 57) - (42 42) - (35 35)) (emoji-component (917536 917631) (129456 129459) @@ -1214,7 +1213,8 @@ (8205 8205) (48 57) (42 42) - (35 35))))) + (35 35)) + (emoji-modifier (127995 127999))))) (define char-set:emoji (char-set)) (define char-set:emoji-presentation (char-set)) @@ -1232,12 +1232,13 @@ (list 'emoji-extended-pictographic char-set:emoji-extended-pictographic))) -(ranges->charset! emoji-ht 'emoji char-set:emoji) -(ranges->charset! emoji-ht 'emoji-presentation char-set:emoji-presentation) -(ranges->charset! emoji-ht 'emoji-modifier char-set:emoji-modifier) -(ranges->charset! emoji-ht 'emoji-modifier-base char-set:emoji-modifier-base) -(ranges->charset! emoji-ht 'emoji-component char-set:emoji-component) +(ranges->charset! hashtable 'emoji char-set:emoji) +(ranges->charset! hashtable 'emoji-presentation char-set:emoji-presentation) +(ranges->charset! hashtable 'emoji-modifier char-set:emoji-modifier) +(ranges->charset! hashtable 'emoji-modifier-base char-set:emoji-modifier-base) +(ranges->charset! hashtable 'emoji-component char-set:emoji-component) (ranges->charset! - emoji-ht + hashtable 'emoji-extended-pictographic char-set:emoji-extended-pictographic) + diff --git a/uniseg/graphemes.scm b/uniseg/graphemes.scm index f7d82b2..34dc6d4 100644 --- a/uniseg/graphemes.scm +++ b/uniseg/graphemes.scm @@ -24,411 +24,10 @@ char-set:grapheme-spacing-mark char-set:grapheme-zero-width-joiner grapheme-charsets)) -(define grapheme-ht +(define hashtable (alist->hashq-table - '((line-feed (10 10)) - (carriage-return (13 13)) - (zero-width-joiner (8205 8205)) - (hangul-syllable-lvt - (55177 55203) - (55149 55175) - (55121 55147) - (55093 55119) - (55065 55091) - (55037 55063) - (55009 55035) - (54981 55007) - (54953 54979) - (54925 54951) - (54897 54923) - (54869 54895) - (54841 54867) - (54813 54839) - (54785 54811) - (54757 54783) - (54729 54755) - (54701 54727) - (54673 54699) - (54645 54671) - (54617 54643) - (54589 54615) - (54561 54587) - (54533 54559) - (54505 54531) - (54477 54503) - (54449 54475) - (54421 54447) - (54393 54419) - (54365 54391) - (54337 54363) - (54309 54335) - (54281 54307) - (54253 54279) - (54225 54251) - (54197 54223) - (54169 54195) - (54141 54167) - (54113 54139) - (54085 54111) - (54057 54083) - (54029 54055) - (54001 54027) - (53973 53999) - (53945 53971) - (53917 53943) - (53889 53915) - (53861 53887) - (53833 53859) - (53805 53831) - (53777 53803) - (53749 53775) - (53721 53747) - (53693 53719) - (53665 53691) - (53637 53663) - (53609 53635) - (53581 53607) - (53553 53579) - (53525 53551) - (53497 53523) - (53469 53495) - (53441 53467) - (53413 53439) - (53385 53411) - (53357 53383) - (53329 53355) - (53301 53327) - (53273 53299) - (53245 53271) - (53217 53243) - (53189 53215) - (53161 53187) - (53133 53159) - (53105 53131) - (53077 53103) - (53049 53075) - (53021 53047) - (52993 53019) - (52965 52991) - (52937 52963) - (52909 52935) - (52881 52907) - (52853 52879) - (52825 52851) - (52797 52823) - (52769 52795) - (52741 52767) - (52713 52739) - (52685 52711) - (52657 52683) - (52629 52655) - (52601 52627) - (52573 52599) - (52545 52571) - (52517 52543) - (52489 52515) - (52461 52487) - (52433 52459) - (52405 52431) - (52377 52403) - (52349 52375) - (52321 52347) - (52293 52319) - (52265 52291) - (52237 52263) - (52209 52235) - (52181 52207) - (52153 52179) - (52125 52151) - (52097 52123) - (52069 52095) - (52041 52067) - (52013 52039) - (51985 52011) - (51957 51983) - (51929 51955) - (51901 51927) - (51873 51899) - (51845 51871) - (51817 51843) - (51789 51815) - (51761 51787) - (51733 51759) - (51705 51731) - (51677 51703) - (51649 51675) - (51621 51647) - (51593 51619) - (51565 51591) - (51537 51563) - (51509 51535) - (51481 51507) - (51453 51479) - (51425 51451) - (51397 51423) - (51369 51395) - (51341 51367) - (51313 51339) - (51285 51311) - (51257 51283) - (51229 51255) - (51201 51227) - (51173 51199) - (51145 51171) - (51117 51143) - (51089 51115) - (51061 51087) - (51033 51059) - (51005 51031) - (50977 51003) - (50949 50975) - (50921 50947) - (50893 50919) - (50865 50891) - (50837 50863) - (50809 50835) - (50781 50807) - (50753 50779) - (50725 50751) - (50697 50723) - (50669 50695) - (50641 50667) - (50613 50639) - (50585 50611) - (50557 50583) - (50529 50555) - (50501 50527) - (50473 50499) - (50445 50471) - (50417 50443) - (50389 50415) - (50361 50387) - (50333 50359) - (50305 50331) - (50277 50303) - (50249 50275) - (50221 50247) - (50193 50219) - (50165 50191) - (50137 50163) - (50109 50135) - (50081 50107) - (50053 50079) - (50025 50051) - (49997 50023) - (49969 49995) - (49941 49967) - (49913 49939) - (49885 49911) - (49857 49883) - (49829 49855) - (49801 49827) - (49773 49799) - (49745 49771) - (49717 49743) - (49689 49715) - (49661 49687) - (49633 49659) - (49605 49631) - (49577 49603) - (49549 49575) - (49521 49547) - (49493 49519) - (49465 49491) - (49437 49463) - (49409 49435) - (49381 49407) - (49353 49379) - (49325 49351) - (49297 49323) - (49269 49295) - (49241 49267) - (49213 49239) - (49185 49211) - (49157 49183) - (49129 49155) - (49101 49127) - (49073 49099) - (49045 49071) - (49017 49043) - (48989 49015) - (48961 48987) - (48933 48959) - (48905 48931) - (48877 48903) - (48849 48875) - (48821 48847) - (48793 48819) - (48765 48791) - (48737 48763) - (48709 48735) - (48681 48707) - (48653 48679) - (48625 48651) - (48597 48623) - (48569 48595) - (48541 48567) - (48513 48539) - (48485 48511) - (48457 48483) - (48429 48455) - (48401 48427) - (48373 48399) - (48345 48371) - (48317 48343) - (48289 48315) - (48261 48287) - (48233 48259) - (48205 48231) - (48177 48203) - (48149 48175) - (48121 48147) - (48093 48119) - (48065 48091) - (48037 48063) - (48009 48035) - (47981 48007) - (47953 47979) - (47925 47951) - (47897 47923) - (47869 47895) - (47841 47867) - (47813 47839) - (47785 47811) - (47757 47783) - (47729 47755) - (47701 47727) - (47673 47699) - (47645 47671) - (47617 47643) - (47589 47615) - (47561 47587) - (47533 47559) - (47505 47531) - (47477 47503) - (47449 47475) - (47421 47447) - (47393 47419) - (47365 47391) - (47337 47363) - (47309 47335) - (47281 47307) - (47253 47279) - (47225 47251) - (47197 47223) - (47169 47195) - (47141 47167) - (47113 47139) - (47085 47111) - (47057 47083) - (47029 47055) - (47001 47027) - (46973 46999) - (46945 46971) - (46917 46943) - (46889 46915) - (46861 46887) - (46833 46859) - (46805 46831) - (46777 46803) - (46749 46775) - (46721 46747) - (46693 46719) - (46665 46691) - (46637 46663) - (46609 46635) - (46581 46607) - (46553 46579) - (46525 46551) - (46497 46523) - (46469 46495) - (46441 46467) - (46413 46439) - (46385 46411) - (46357 46383) - (46329 46355) - (46301 46327) - (46273 46299) - (46245 46271) - (46217 46243) - (46189 46215) - (46161 46187) - (46133 46159) - (46105 46131) - (46077 46103) - (46049 46075) - (46021 46047) - (45993 46019) - (45965 45991) - (45937 45963) - (45909 45935) - (45881 45907) - (45853 45879) - (45825 45851) - (45797 45823) - (45769 45795) - (45741 45767) - (45713 45739) - (45685 45711) - (45657 45683) - (45629 45655) - (45601 45627) - (45573 45599) - (45545 45571) - (45517 45543) - (45489 45515) - (45461 45487) - (45433 45459) - (45405 45431) - (45377 45403) - (45349 45375) - (45321 45347) - (45293 45319) - (45265 45291) - (45237 45263) - (45209 45235) - (45181 45207) - (45153 45179) - (45125 45151) - (45097 45123) - (45069 45095) - (45041 45067) - (45013 45039) - (44985 45011) - (44957 44983) - (44929 44955) - (44901 44927) - (44873 44899) - (44845 44871) - (44817 44843) - (44789 44815) - (44761 44787) - (44733 44759) - (44705 44731) - (44677 44703) - (44649 44675) - (44621 44647) - (44593 44619) - (44565 44591) - (44537 44563) - (44509 44535) - (44481 44507) - (44453 44479) - (44425 44451) - (44397 44423) - (44369 44395) - (44341 44367) - (44313 44339) - (44285 44311) - (44257 44283) - (44229 44255) - (44201 44227) - (44173 44199) - (44145 44171) - (44117 44143) - (44089 44115) - (44061 44087) - (44033 44059)) + '((carriage-return (13 13)) + (regional-indicator (127462 127487)) (hangul-syllable-lv (55176 55176) (55148 55148) @@ -829,219 +428,7 @@ (44088 44088) (44060 44060) (44032 44032)) - (hangul-syllable-l (43360 43388) (4352 4447)) (hangul-syllable-v (55216 55238) (4448 4519)) - (control - (918000 921599) - (917632 917759) - (917506 917535) - (917505 917505) - (917504 917504) - (119155 119162) - (113824 113827) - (78896 78911) - (65529 65531) - (65520 65528) - (65279 65279) - (8294 8303) - (8293 8293) - (8288 8292) - (8234 8238) - (8233 8233) - (8232 8232) - (8206 8207) - (8203 8203) - (6158 6158) - (1564 1564) - (173 173) - (127 159) - (14 31) - (11 12) - (0 9)) - (regional-indicator (127462 127487)) - (hangul-syllable-t (55243 55291) (4520 4607)) - (prepend - (73474 73474) - (73030 73030) - (72324 72329) - (72250 72250) - (72001 72001) - (71999 71999) - (70082 70083) - (69837 69837) - (69821 69821) - (3406 3406) - (2274 2274) - (2192 2193) - (1807 1807) - (1757 1757) - (1536 1541)) - (spacing-mark - (119149 119149) - (119142 119142) - (94192 94193) - (94033 94087) - (73537 73537) - (73534 73535) - (73524 73525) - (73475 73475) - (73461 73462) - (73110 73110) - (73107 73108) - (73098 73102) - (72884 72884) - (72881 72881) - (72873 72873) - (72766 72766) - (72751 72751) - (72343 72343) - (72279 72280) - (72249 72249) - (72164 72164) - (72156 72159) - (72145 72147) - (72002 72002) - (72000 72000) - (71997 71997) - (71991 71992) - (71985 71989) - (71736 71736) - (71724 71726) - (71462 71462) - (71350 71350) - (71342 71343) - (71340 71340) - (71230 71230) - (71227 71228) - (71216 71218) - (71102 71102) - (71096 71099) - (71088 71089) - (70849 70849) - (70846 70846) - (70843 70844) - (70841 70841) - (70833 70834) - (70725 70725) - (70720 70721) - (70709 70711) - (70498 70499) - (70475 70477) - (70471 70472) - (70465 70468) - (70463 70463) - (70402 70403) - (70368 70370) - (70197 70197) - (70194 70195) - (70188 70190) - (70094 70094) - (70079 70080) - (70067 70069) - (70018 70018) - (69957 69958) - (69932 69932) - (69815 69816) - (69808 69810) - (69762 69762) - (69634 69634) - (69632 69632) - (44012 44012) - (44009 44010) - (44006 44007) - (44003 44004) - (43765 43765) - (43758 43759) - (43755 43755) - (43597 43597) - (43571 43572) - (43567 43568) - (43454 43456) - (43450 43451) - (43444 43445) - (43395 43395) - (43346 43347) - (43188 43203) - (43136 43137) - (43047 43047) - (43043 43044) - (7415 7415) - (7393 7393) - (7220 7221) - (7204 7211) - (7154 7155) - (7150 7150) - (7146 7148) - (7143 7143) - (7082 7082) - (7078 7079) - (7073 7073) - (7042 7042) - (6979 6980) - (6973 6977) - (6971 6971) - (6916 6916) - (6765 6770) - (6743 6743) - (6741 6741) - (6681 6682) - (6451 6456) - (6448 6449) - (6441 6443) - (6435 6438) - (6087 6088) - (6078 6085) - (6070 6070) - (5940 5940) - (5909 5909) - (4228 4228) - (4182 4183) - (4155 4156) - (4145 4145) - (3967 3967) - (3902 3903) - (3763 3763) - (3635 3635) - (3570 3571) - (3544 3550) - (3536 3537) - (3458 3459) - (3402 3404) - (3398 3400) - (3391 3392) - (3330 3331) - (3315 3315) - (3274 3275) - (3271 3272) - (3267 3268) - (3264 3265) - (3262 3262) - (3202 3203) - (3137 3140) - (3073 3075) - (3018 3020) - (3014 3016) - (3009 3010) - (3007 3007) - (2891 2892) - (2887 2888) - (2880 2880) - (2818 2819) - (2763 2764) - (2761 2761) - (2750 2752) - (2691 2691) - (2622 2624) - (2563 2563) - (2507 2508) - (2503 2504) - (2495 2496) - (2434 2435) - (2382 2383) - (2377 2380) - (2366 2368) - (2363 2363) - (2307 2307)) (extend (917760 917999) (917536 917631) @@ -1419,7 +806,620 @@ (1425 1469) (1160 1161) (1155 1159) - (768 879))))) + (768 879)) + (line-feed (10 10)) + (zero-width-joiner (8205 8205)) + (control + (918000 921599) + (917632 917759) + (917506 917535) + (917505 917505) + (917504 917504) + (119155 119162) + (113824 113827) + (78896 78911) + (65529 65531) + (65520 65528) + (65279 65279) + (8294 8303) + (8293 8293) + (8288 8292) + (8234 8238) + (8233 8233) + (8232 8232) + (8206 8207) + (8203 8203) + (6158 6158) + (1564 1564) + (173 173) + (127 159) + (14 31) + (11 12) + (0 9)) + (hangul-syllable-lvt + (55177 55203) + (55149 55175) + (55121 55147) + (55093 55119) + (55065 55091) + (55037 55063) + (55009 55035) + (54981 55007) + (54953 54979) + (54925 54951) + (54897 54923) + (54869 54895) + (54841 54867) + (54813 54839) + (54785 54811) + (54757 54783) + (54729 54755) + (54701 54727) + (54673 54699) + (54645 54671) + (54617 54643) + (54589 54615) + (54561 54587) + (54533 54559) + (54505 54531) + (54477 54503) + (54449 54475) + (54421 54447) + (54393 54419) + (54365 54391) + (54337 54363) + (54309 54335) + (54281 54307) + (54253 54279) + (54225 54251) + (54197 54223) + (54169 54195) + (54141 54167) + (54113 54139) + (54085 54111) + (54057 54083) + (54029 54055) + (54001 54027) + (53973 53999) + (53945 53971) + (53917 53943) + (53889 53915) + (53861 53887) + (53833 53859) + (53805 53831) + (53777 53803) + (53749 53775) + (53721 53747) + (53693 53719) + (53665 53691) + (53637 53663) + (53609 53635) + (53581 53607) + (53553 53579) + (53525 53551) + (53497 53523) + (53469 53495) + (53441 53467) + (53413 53439) + (53385 53411) + (53357 53383) + (53329 53355) + (53301 53327) + (53273 53299) + (53245 53271) + (53217 53243) + (53189 53215) + (53161 53187) + (53133 53159) + (53105 53131) + (53077 53103) + (53049 53075) + (53021 53047) + (52993 53019) + (52965 52991) + (52937 52963) + (52909 52935) + (52881 52907) + (52853 52879) + (52825 52851) + (52797 52823) + (52769 52795) + (52741 52767) + (52713 52739) + (52685 52711) + (52657 52683) + (52629 52655) + (52601 52627) + (52573 52599) + (52545 52571) + (52517 52543) + (52489 52515) + (52461 52487) + (52433 52459) + (52405 52431) + (52377 52403) + (52349 52375) + (52321 52347) + (52293 52319) + (52265 52291) + (52237 52263) + (52209 52235) + (52181 52207) + (52153 52179) + (52125 52151) + (52097 52123) + (52069 52095) + (52041 52067) + (52013 52039) + (51985 52011) + (51957 51983) + (51929 51955) + (51901 51927) + (51873 51899) + (51845 51871) + (51817 51843) + (51789 51815) + (51761 51787) + (51733 51759) + (51705 51731) + (51677 51703) + (51649 51675) + (51621 51647) + (51593 51619) + (51565 51591) + (51537 51563) + (51509 51535) + (51481 51507) + (51453 51479) + (51425 51451) + (51397 51423) + (51369 51395) + (51341 51367) + (51313 51339) + (51285 51311) + (51257 51283) + (51229 51255) + (51201 51227) + (51173 51199) + (51145 51171) + (51117 51143) + (51089 51115) + (51061 51087) + (51033 51059) + (51005 51031) + (50977 51003) + (50949 50975) + (50921 50947) + (50893 50919) + (50865 50891) + (50837 50863) + (50809 50835) + (50781 50807) + (50753 50779) + (50725 50751) + (50697 50723) + (50669 50695) + (50641 50667) + (50613 50639) + (50585 50611) + (50557 50583) + (50529 50555) + (50501 50527) + (50473 50499) + (50445 50471) + (50417 50443) + (50389 50415) + (50361 50387) + (50333 50359) + (50305 50331) + (50277 50303) + (50249 50275) + (50221 50247) + (50193 50219) + (50165 50191) + (50137 50163) + (50109 50135) + (50081 50107) + (50053 50079) + (50025 50051) + (49997 50023) + (49969 49995) + (49941 49967) + (49913 49939) + (49885 49911) + (49857 49883) + (49829 49855) + (49801 49827) + (49773 49799) + (49745 49771) + (49717 49743) + (49689 49715) + (49661 49687) + (49633 49659) + (49605 49631) + (49577 49603) + (49549 49575) + (49521 49547) + (49493 49519) + (49465 49491) + (49437 49463) + (49409 49435) + (49381 49407) + (49353 49379) + (49325 49351) + (49297 49323) + (49269 49295) + (49241 49267) + (49213 49239) + (49185 49211) + (49157 49183) + (49129 49155) + (49101 49127) + (49073 49099) + (49045 49071) + (49017 49043) + (48989 49015) + (48961 48987) + (48933 48959) + (48905 48931) + (48877 48903) + (48849 48875) + (48821 48847) + (48793 48819) + (48765 48791) + (48737 48763) + (48709 48735) + (48681 48707) + (48653 48679) + (48625 48651) + (48597 48623) + (48569 48595) + (48541 48567) + (48513 48539) + (48485 48511) + (48457 48483) + (48429 48455) + (48401 48427) + (48373 48399) + (48345 48371) + (48317 48343) + (48289 48315) + (48261 48287) + (48233 48259) + (48205 48231) + (48177 48203) + (48149 48175) + (48121 48147) + (48093 48119) + (48065 48091) + (48037 48063) + (48009 48035) + (47981 48007) + (47953 47979) + (47925 47951) + (47897 47923) + (47869 47895) + (47841 47867) + (47813 47839) + (47785 47811) + (47757 47783) + (47729 47755) + (47701 47727) + (47673 47699) + (47645 47671) + (47617 47643) + (47589 47615) + (47561 47587) + (47533 47559) + (47505 47531) + (47477 47503) + (47449 47475) + (47421 47447) + (47393 47419) + (47365 47391) + (47337 47363) + (47309 47335) + (47281 47307) + (47253 47279) + (47225 47251) + (47197 47223) + (47169 47195) + (47141 47167) + (47113 47139) + (47085 47111) + (47057 47083) + (47029 47055) + (47001 47027) + (46973 46999) + (46945 46971) + (46917 46943) + (46889 46915) + (46861 46887) + (46833 46859) + (46805 46831) + (46777 46803) + (46749 46775) + (46721 46747) + (46693 46719) + (46665 46691) + (46637 46663) + (46609 46635) + (46581 46607) + (46553 46579) + (46525 46551) + (46497 46523) + (46469 46495) + (46441 46467) + (46413 46439) + (46385 46411) + (46357 46383) + (46329 46355) + (46301 46327) + (46273 46299) + (46245 46271) + (46217 46243) + (46189 46215) + (46161 46187) + (46133 46159) + (46105 46131) + (46077 46103) + (46049 46075) + (46021 46047) + (45993 46019) + (45965 45991) + (45937 45963) + (45909 45935) + (45881 45907) + (45853 45879) + (45825 45851) + (45797 45823) + (45769 45795) + (45741 45767) + (45713 45739) + (45685 45711) + (45657 45683) + (45629 45655) + (45601 45627) + (45573 45599) + (45545 45571) + (45517 45543) + (45489 45515) + (45461 45487) + (45433 45459) + (45405 45431) + (45377 45403) + (45349 45375) + (45321 45347) + (45293 45319) + (45265 45291) + (45237 45263) + (45209 45235) + (45181 45207) + (45153 45179) + (45125 45151) + (45097 45123) + (45069 45095) + (45041 45067) + (45013 45039) + (44985 45011) + (44957 44983) + (44929 44955) + (44901 44927) + (44873 44899) + (44845 44871) + (44817 44843) + (44789 44815) + (44761 44787) + (44733 44759) + (44705 44731) + (44677 44703) + (44649 44675) + (44621 44647) + (44593 44619) + (44565 44591) + (44537 44563) + (44509 44535) + (44481 44507) + (44453 44479) + (44425 44451) + (44397 44423) + (44369 44395) + (44341 44367) + (44313 44339) + (44285 44311) + (44257 44283) + (44229 44255) + (44201 44227) + (44173 44199) + (44145 44171) + (44117 44143) + (44089 44115) + (44061 44087) + (44033 44059)) + (hangul-syllable-t (55243 55291) (4520 4607)) + (prepend + (73474 73474) + (73030 73030) + (72324 72329) + (72250 72250) + (72001 72001) + (71999 71999) + (70082 70083) + (69837 69837) + (69821 69821) + (3406 3406) + (2274 2274) + (2192 2193) + (1807 1807) + (1757 1757) + (1536 1541)) + (hangul-syllable-l (43360 43388) (4352 4447)) + (spacing-mark + (119149 119149) + (119142 119142) + (94192 94193) + (94033 94087) + (73537 73537) + (73534 73535) + (73524 73525) + (73475 73475) + (73461 73462) + (73110 73110) + (73107 73108) + (73098 73102) + (72884 72884) + (72881 72881) + (72873 72873) + (72766 72766) + (72751 72751) + (72343 72343) + (72279 72280) + (72249 72249) + (72164 72164) + (72156 72159) + (72145 72147) + (72002 72002) + (72000 72000) + (71997 71997) + (71991 71992) + (71985 71989) + (71736 71736) + (71724 71726) + (71462 71462) + (71350 71350) + (71342 71343) + (71340 71340) + (71230 71230) + (71227 71228) + (71216 71218) + (71102 71102) + (71096 71099) + (71088 71089) + (70849 70849) + (70846 70846) + (70843 70844) + (70841 70841) + (70833 70834) + (70725 70725) + (70720 70721) + (70709 70711) + (70498 70499) + (70475 70477) + (70471 70472) + (70465 70468) + (70463 70463) + (70402 70403) + (70368 70370) + (70197 70197) + (70194 70195) + (70188 70190) + (70094 70094) + (70079 70080) + (70067 70069) + (70018 70018) + (69957 69958) + (69932 69932) + (69815 69816) + (69808 69810) + (69762 69762) + (69634 69634) + (69632 69632) + (44012 44012) + (44009 44010) + (44006 44007) + (44003 44004) + (43765 43765) + (43758 43759) + (43755 43755) + (43597 43597) + (43571 43572) + (43567 43568) + (43454 43456) + (43450 43451) + (43444 43445) + (43395 43395) + (43346 43347) + (43188 43203) + (43136 43137) + (43047 43047) + (43043 43044) + (7415 7415) + (7393 7393) + (7220 7221) + (7204 7211) + (7154 7155) + (7150 7150) + (7146 7148) + (7143 7143) + (7082 7082) + (7078 7079) + (7073 7073) + (7042 7042) + (6979 6980) + (6973 6977) + (6971 6971) + (6916 6916) + (6765 6770) + (6743 6743) + (6741 6741) + (6681 6682) + (6451 6456) + (6448 6449) + (6441 6443) + (6435 6438) + (6087 6088) + (6078 6085) + (6070 6070) + (5940 5940) + (5909 5909) + (4228 4228) + (4182 4183) + (4155 4156) + (4145 4145) + (3967 3967) + (3902 3903) + (3763 3763) + (3635 3635) + (3570 3571) + (3544 3550) + (3536 3537) + (3458 3459) + (3402 3404) + (3398 3400) + (3391 3392) + (3330 3331) + (3315 3315) + (3274 3275) + (3271 3272) + (3267 3268) + (3264 3265) + (3262 3262) + (3202 3203) + (3137 3140) + (3073 3075) + (3018 3020) + (3014 3016) + (3009 3010) + (3007 3007) + (2891 2892) + (2887 2888) + (2880 2880) + (2818 2819) + (2763 2764) + (2761 2761) + (2750 2752) + (2691 2691) + (2622 2624) + (2563 2563) + (2507 2508) + (2503 2504) + (2495 2496) + (2434 2435) + (2382 2383) + (2377 2380) + (2366 2368) + (2363 2363) + (2307 2307))))) (define char-set:grapheme-hangul-syllable-l (char-set)) (define char-set:grapheme-hangul-syllable-v (char-set)) @@ -1446,39 +1446,39 @@ (list 'extend char-set:grapheme-extend) (list 'regional-indicator char-set:grapheme-regional-indicator) (list 'spacing-mark char-set:grapheme-spacing-mark) - (list 'zero-width-joiner char-set:grapheme-zero-width-joiner) - (list 'extended-pictographic char-set:emoji-extended-pictographic))) + (list 'zero-width-joiner char-set:grapheme-zero-width-joiner))) (ranges->charset! - grapheme-ht + hashtable 'hangul-syllable-l char-set:grapheme-hangul-syllable-l) (ranges->charset! - grapheme-ht + hashtable 'hangul-syllable-v char-set:grapheme-hangul-syllable-v) (ranges->charset! - grapheme-ht + hashtable 'hangul-syllable-lv char-set:grapheme-hangul-syllable-lv) (ranges->charset! - grapheme-ht + hashtable 'hangul-syllable-lvt char-set:grapheme-hangul-syllable-lvt) -(ranges->charset! grapheme-ht 'prepend char-set:grapheme-prepend) +(ranges->charset! hashtable 'prepend char-set:grapheme-prepend) +(ranges->charset! hashtable 'carriage-return char-set:grapheme-carriage-return) +(ranges->charset! hashtable 'line-feed char-set:grapheme-line-feed) +(ranges->charset! hashtable 'control char-set:grapheme-control) +(ranges->charset! hashtable 'extend char-set:grapheme-extend) (ranges->charset! - grapheme-ht - 'carriage-return - char-set:grapheme-carriage-return) -(ranges->charset! grapheme-ht 'line-feed char-set:grapheme-line-feed) -(ranges->charset! grapheme-ht 'control char-set:grapheme-control) -(ranges->charset! grapheme-ht 'extend char-set:grapheme-extend) -(ranges->charset! - grapheme-ht + hashtable 'regional-indicator char-set:grapheme-regional-indicator) -(ranges->charset! grapheme-ht 'spacing-mark char-set:grapheme-spacing-mark) +(ranges->charset! hashtable 'spacing-mark char-set:grapheme-spacing-mark) (ranges->charset! - grapheme-ht + hashtable 'zero-width-joiner char-set:grapheme-zero-width-joiner) + +(set! grapheme-charsets + (cons (list 'extended-pictographic char-set:emoji-extended-pictographic) + grapheme-charsets)) diff --git a/uniseg/graphemes/stream.scm b/uniseg/graphemes/stream.scm index 7bc64ea..6a9a4eb 100644 --- a/uniseg/graphemes/stream.scm +++ b/uniseg/graphemes/stream.scm @@ -125,6 +125,7 @@ ;; Grapheme boundary #11n (emoji!) ((_ 'extended-pictographic) + (pk "EXTENDED") (values 'extended-pictographic #t)) ;; Grapheme boundaries #12n and #13n @@ -148,7 +149,7 @@ state) (begin (let* ((width property (char-width glyph)) - (next-state boundary? (state-machine state property))) + (next-state boundary? (cpk-values glyph state property '= (state-machine state property)))) (if boundary? state diff --git a/uniseg/internal.scm b/uniseg/internal.scm index a4aaa95..4292ab3 100644 --- a/uniseg/internal.scm +++ b/uniseg/internal.scm @@ -1,19 +1,18 @@ (define-module (uniseg internal) #:use-module (ice-9 peg) + #:use-module (ice-9 match) #:use-module (ice-9 textual-ports) #:use-module (ice-9 exceptions) + #:use-module (ice-9 hash-table) #:use-module (ice-9 i18n) + #:use-module (ice-9 format) #:use-module (ice-9 pretty-print) #:use-module (web uri) #:use-module (web client) #:use-module (web request) #:use-module (srfi srfi-1) #:use-module (srfi srfi-71) - #:export (@hex - @codepoint - @codepoint-range - @comment - @ws + #:export (@line cpk-values cpk cons-hash-list! @@ -21,7 +20,8 @@ format-exception-msg in-surrogate-range cmdline-wget-or-file - ranges->charset!)) + ranges->charset! + make-line-processor)) ;; ;; Common PEG patterns @@ -42,9 +42,99 @@ (define-peg-pattern @ws none (or " " "\t")) +(define-peg-pattern @property all + (* (peg "[a-zA-Z_]"))) + +(define-peg-pattern @datum body + (and @codepoint-range (* @ws) (ignore ";") (* @ws) @property)) + +(define-peg-pattern @line body + (and @datum (* @ws) @comment)) + + +;; Giant unicode code generation procedure creator +(define (make-line-processor + hashtable + string->property + properties + symbols + charsets-symbol + stdout) + + (define properties-and-symbols + (zip properties symbols)) + + (define (process-line line) + (define tree (peg:tree (match-pattern @line line))) + + (unless + (or (not tree) + (null? tree) + (eq? 'comment (car tree))) + + (match tree + (((('@codepoint-range + ('@codepoint codepoints) ___) + ('@property prop-str)) + ('@comment comment)) + + (with-exception-handler + (λ (err) + (format stdout "Skipping line due to error :: ") + (format-exception-msg stdout err)) + (λ () + (let ((f (hex-string->integer (first codepoints))) + (l (hex-string->integer (last codepoints))) + (property (string->property prop-str comment))) + (when (or (in-surrogate-range f) + (in-surrogate-range l)) + (error (format #f "chars in surrogate range ~x -> ~x" f l))) + + (cons-hash-list! hashtable property f l))) + #:unwind? #t))))) + + (define (output-boilerplate) + (pretty-print + `(define hashtable + (alist->hashq-table ',(hash-map->list cons hashtable)))) + + (display "\n") + + (for-each + (λ (sym) + (pretty-print + `(define ,sym (char-set)))) + symbols) + + (display "\n") + + (pretty-print + `(define ,charsets-symbol + (list + ,@(map + (λ (pair) + (let ((f (first pair)) + (s (second pair))) + `(list ',f ,s))) + properties-and-symbols)))) + + (display "\n") + + (for-each + (λ (set-pair) + (let ((name (first set-pair)) + (symbol (second set-pair))) + (pretty-print + `(ranges->charset! hashtable ',name ,symbol)))) + properties-and-symbols) + + (display "\n")) + + (values process-line output-boilerplate)) + ;; Helper macro to add a list of character ranges ;; to a hash-set. -(define-syntax-rule (cons-hash-list! ht key low high) +(define (cons-hash-list! ht key low high) (let* ((old (hashq-ref ht key)) (value (list low high)) (new-lst