diff --git a/HACKING b/HACKING index bc8d5c1..b52ad78 100644 --- a/HACKING +++ b/HACKING @@ -1,10 +1,10 @@ # -*- mode: org; coding: utf-8; -*- -#+TITLE: Hacking guile-runewidth +#+TITLE: Hacking guile-uniseg * Contributing -By far the easiest way to hack on guile-runewidth is to develop using Guix: +By far the easiest way to hack on guile-uniseg is to develop using Guix: #+BEGIN_SRC bash # Obtain the source code diff --git a/README.org b/README.org index 4a34901..f688305 100644 --- a/README.org +++ b/README.org @@ -1,14 +1,14 @@ # -*- mode: org; coding: utf-8; -*- -#+TITLE: README for Guile Runewidth +#+TITLE: README for Guile Uniseg -~guile-runewidth~ is a simple Guile library that provides several new Guile character sets for categorizing +~guile-uniseg~ is a simple Guile library that provides several new Guile character sets for categorizing East Asian characters and emoji. This can be useful when determining the width of characters. -This library is heavily inspired by the golang library ~runewidth~. +This library is heavily inspired by the golang libraries ~uniseg~ and ~uniseg~. * Scripts There are two scripts which can be used to regenerate the character set files. These are optional to run, since -the script's output is stored in the repository at ~runewidth/eastasian.scm~ and ~runewidth/emoji.scm~. +the script's output is stored in the repository at ~uniseg/eastasian.scm~ and ~uniseg/emoji.scm~. However, should the source change as the unicode spec changes, these can be refreshed. diff --git a/doc/guile-runewidth.texi b/doc/guile-uniseg.texi similarity index 98% rename from doc/guile-runewidth.texi rename to doc/guile-uniseg.texi index 0f3542f..f0a8dd4 100644 --- a/doc/guile-runewidth.texi +++ b/doc/guile-uniseg.texi @@ -27,7 +27,7 @@ Documentation License''. @titlepage @title The Reflow Manual -@author +@author Vivianne Langdon @page @vskip 0pt plus 1filll diff --git a/guix.scm b/guix.scm index b2406e8..0cd3f87 100644 --- a/guix.scm +++ b/guix.scm @@ -13,7 +13,7 @@ (srfi srfi-1)) (package - (name "guile-runewidth") + (name "guile-uniseg") (version "0.1") (source (local-file @@ -92,6 +92,6 @@ (description "A library that provides guile character sets and operations to work with runes that are more than a single character width.") (home-page - "https://git.solarpunk.moe/vv/guile-runewidth") + "https://git.solarpunk.moe/vv/guile-uniseg") (license license:gpl3+)) diff --git a/hall.scm b/hall.scm index ce1b7e5..ac8a64b 100644 --- a/hall.scm +++ b/hall.scm @@ -1,5 +1,5 @@ (hall-description - (name "runewidth") + (name "uniseg") (prefix "guile") (version "0.1") (author "Vivanne Langdon") @@ -9,7 +9,7 @@ (description "A library that provides guile character sets and operations to work with runes that are more than a single character width.") (home-page - "https://git.solarpunk.moe/vv/guile-runewidth") + "https://git.solarpunk.moe/vv/guile-uniseg") (license gpl3+) (dependencies `()) (skip ()) @@ -19,12 +19,13 @@ (native-language-support #f) (licensing #f))) (files (libraries - ((scheme-file "runewidth") + ((scheme-file "uniseg") (directory - "runewidth" + "uniseg" ((scheme-file "emoji") (directory "eastasian" ((scheme-file "locale"))) (scheme-file "eastasian") + (directory "graphemes" ((scheme-file "stream"))) (scheme-file "graphemes") (scheme-file "internal"))))) (tests ((directory @@ -41,7 +42,7 @@ (symlink "README" "README.org") (text-file "HACKING") (text-file "COPYING") - (directory "doc" ((texi-file "guile-runewidth"))))) + (directory "doc" ((texi-file "guile-uniseg"))))) (infrastructure ((scheme-file "guix") (text-file ".gitignore") diff --git a/runewidth.scm b/runewidth.scm deleted file mode 100644 index e69de29..0000000 diff --git a/scripts/generate-east-asian.in b/scripts/generate-east-asian.in index 324edd1..ff0f01d 100644 --- a/scripts/generate-east-asian.in +++ b/scripts/generate-east-asian.in @@ -5,7 +5,7 @@ ;; Can be called with a trailing argument pointing to the file on disk. (use-modules - (runewidth internal) + (uniseg internal) (ice-9 pretty-print) (ice-9 peg) (ice-9 format) @@ -97,7 +97,7 @@ (λ () (wget-to-lines east-asian-url stdout)))) -(define file "runewidth/eastasian.scm") +(define file "uniseg/eastasian.scm") (format stdout "Writing to ~a...\n" file) @@ -108,12 +108,13 @@ (for-each process-east-asian-line (line-func)) (pretty-print - `(define-module (runewidth eastasian) + `(define-module (uniseg eastasian) #:use-module (ice-9 hash-table) #:use-module (srfi srfi-1) - #:use-module (runewidth internal) + #:use-module (uniseg internal) #:export - ,ea-symbol-names)) + (,@ea-symbol-names + eastasian-charsets))) (pretty-print `(define eastasian-ht @@ -129,6 +130,18 @@ (display "\n") + (pretty-print + `(define eastasian-charsets + (list + ,@(map + (λ (pair) + (let ((f (first pair)) + (s (second pair))) + `(list ',f ,s))) + ea-sets-and-symbols)))) + + (display "\n") + (for-each (λ (set-pair) (let ((name (first set-pair)) diff --git a/scripts/generate-emoji.in b/scripts/generate-emoji.in index 059bf63..6d255b7 100644 --- a/scripts/generate-emoji.in +++ b/scripts/generate-emoji.in @@ -5,7 +5,7 @@ ;; Can be called with a trailing argument pointing to the file on disk. (use-modules - (runewidth internal) + (uniseg internal) (ice-9 pretty-print) (ice-9 peg) (ice-9 format) @@ -66,7 +66,7 @@ (λ () (wget-to-lines emoji-url stdout)))) -(define file "runewidth/emoji.scm") +(define file "uniseg/emoji.scm") (format stdout "Writing to ~a...\n" file) @@ -77,9 +77,9 @@ (for-each process-emoji-line (line-func)) (pretty-print - `(define-module (runewidth emoji) + `(define-module (uniseg emoji) #:use-module (srfi srfi-1) - #:export (char-set:emoji))) + #:export (char-set:extended-pictographic))) (pretty-print `(define emoji-list ',emoji-list)) @@ -87,7 +87,7 @@ (display "\n") (pretty-print - `(define char-set:emoji (char-set))) + `(define char-set:extended-pictographic (char-set))) (display "\n") @@ -97,7 +97,7 @@ (ucs-range->char-set! (first pair) (+ 1 (second pair)) - #t char-set:emoji)) + #t char-set:extended-pictographic)) emoji-list)) (display "Code generation complete.\n" stdout))) diff --git a/scripts/generate-graphemes.in b/scripts/generate-graphemes.in index d42f72e..18b75ca 100644 --- a/scripts/generate-graphemes.in +++ b/scripts/generate-graphemes.in @@ -5,7 +5,7 @@ ;; Can be called with a trailing argument pointing to the file on disk. (use-modules - (runewidth internal) + (uniseg internal) (ice-9 pretty-print) (ice-9 peg) (ice-9 format) @@ -108,7 +108,7 @@ (λ () (wget-to-lines grapheme-url stdout)))) -(define file "runewidth/graphemes.scm") +(define file "uniseg/graphemes.scm") (format stdout "Writing to ~a...\n" file) @@ -119,11 +119,12 @@ (for-each process-grapheme-line (line-func)) (pretty-print - `(define-module (runewidth graphemes) + `(define-module (uniseg graphemes) #:use-module (ice-9 hash-table) #:use-module (srfi srfi-1) - #:use-module (runewidth internal) - #:export ,grapheme-symbol-names)) + #:use-module (uniseg internal) + #:export ( ,@grapheme-symbol-names + grapheme-charsets))) (pretty-print @@ -132,6 +133,26 @@ (display "\n") + (for-each + (λ (sym) + (pretty-print + `(define ,sym (char-set)))) + grapheme-symbol-names) + + (display "\n") + + (pretty-print + `(define grapheme-charsets + (list + ,@(map + (λ (pair) + (let ((f (first pair)) + (s (second pair))) + `(list ',f ,s))) + grapheme-sets-and-symbols)))) + + (display "\n") + (for-each (λ (set-pair) (let ((name (first set-pair)) diff --git a/tests/test-eastasian-locale.scm b/tests/test-eastasian-locale.scm index 0872005..c3baa88 100644 --- a/tests/test-eastasian-locale.scm +++ b/tests/test-eastasian-locale.scm @@ -1,5 +1,5 @@ (define-module (tests test-eastasian-locale) - #:use-module (runewidth eastasian locale) + #:use-module (uniseg eastasian locale) #:use-module (srfi srfi-64)) (test-begin "test-eastasian-locale") diff --git a/uniseg.scm b/uniseg.scm new file mode 100644 index 0000000..002acf7 --- /dev/null +++ b/uniseg.scm @@ -0,0 +1,60 @@ +(define-module (uniseg) + #:use-module (srfi srfi-1) + #:use-module (ice-9 match) + #:use-module (uniseg emoji) + #:use-module (uniseg graphemes) + #:use-module (uniseg eastasian) + #:export (emoji? + grapheme-property + eastasian-property)) + +(define (emoji? char) + (char-set-contains? char-set:extended-pictographic char)) + +(define (get-prop sets char) + (let ((pair + (find + (λ (p) (char-set-contains? (second p) char)) + sets))) + (and pair + (first pair)))) + +(define (grapheme-property char) + "Find the unicode grapheme cluster property, as defined by https://www.unicode.org/reports/tr44/" + (or (get-prop grapheme-charsets char) + 'other)) + + +(define (eastasian-property char) + (define (<==> lo hi) + "Create a range predicate, inclusive between two numbers" + (λ (num) + (and (>= num lo) + (<= num hi)))) + + (define range-unified-ideographs-ext-a? + (<==> #x3400 #x4DBF)) + (define range-unified-ideographs? + (<==> #x4E00 #x9FFF)) + (define range-compatibility-ideographs? + (<==> #xF900 #xFAFF)) + (define range-plane-2? + (<==> #x20000 #x2FFFD)) + (define range-plane-3? + (<==> #x30000 #x3FFFD)) + + ;; The spec defines any values not in the table be wide, if + ;; they are in certain ranges. So let's take care of that here. + ;; These characters are not in the char-sets as they are undesignated. + ;; See comments in unicode's EastAsianWidth.txt + (or (get-prop eastasian-charsets char) + (let ((int (char->integer char))) + (match int + ((or (? range-unified-ideographs-ext-a?) + (? range-unified-ideographs?) + (? range-compatibility-ideographs?) + (? range-plane-2?) + (? range-plane-3?)) + 'doublewidth) + (else + 'neutral))))) diff --git a/runewidth/eastasian.scm b/uniseg/eastasian.scm similarity index 98% rename from runewidth/eastasian.scm rename to uniseg/eastasian.scm index d1e2cb5..01518a1 100644 --- a/runewidth/eastasian.scm +++ b/uniseg/eastasian.scm @@ -1,76 +1,24 @@ ;; Code generated by scripts/generate-east-asian. DO NOT EDIT (define-module - (runewidth eastasian) + (uniseg eastasian) #:use-module (ice-9 hash-table) #:use-module (srfi srfi-1) #:use-module - (runewidth internal) + (uniseg internal) #:export (char-set:eastasian-combining char-set:eastasian-doublewidth char-set:eastasian-halfwidth char-set:eastasian-narrow char-set:eastasian-neutral - char-set:eastasian-ambiguous)) + char-set:eastasian-ambiguous + eastasian-charsets)) (define eastasian-ht (alist->hashq-table - '((combining - (125136 125142) - (122918 122922) - (122915 122916) - (122907 122913) - (122888 122904) - (122880 122886) - (119362 119364) - (119210 119213) - (119173 119179) - (119163 119170) - (119149 119154) - (119143 119145) - (119141 119142) - (92912 92916) - (70512 70516) - (70502 70508) - (70459 70460) - (70400 70401) - (69446 69456) - (69291 69292) - (66422 66426) - (66045 66045) - (65056 65071) - (43232 43249) - (42736 42737) - (42654 42655) - (42612 42621) - (42608 42610) - (42607 42607) - (12441 12442) - (11744 11775) - (11503 11505) - (8421 8432) - (8418 8420) - (8417 8417) - (8413 8416) - (8400 8412) - (7675 7679) - (7616 7673) - (7019 7027) - (6847 6848) - (6846 6846) - (6832 6845) - (6783 6783) - (4957 4959) - (3328 3329) - (3076 3076) - (3072 3072) - (2027 2035) - (1160 1161) - (1155 1159) - (768 879)) - (ambiguous + '((ambiguous (1048576 1114109) (983040 1048573) (917760 917999) @@ -329,6 +277,336 @@ (36 36) (33 35) (32 32)) + (doublewidth + (201547 262141) + (196608 201546) + (195104 196605) + (195102 195103) + (194560 195101) + (191457 194559) + (183984 191456) + (183970 183983) + (178208 183969) + (178206 178207) + (177984 178205) + (177973 177983) + (173824 177972) + (173790 173823) + (131072 173789) + (129744 129750) + (129728 129730) + (129712 129718) + (129680 129704) + (129664 129670) + (129656 129658) + (129648 129652) + (129485 129535) + (129402 129483) + (129351 129400) + (129340 129349) + (129292 129338) + (128992 129003) + (128756 128764) + (128747 128748) + (128725 128727) + (128720 128722) + (128716 128716) + (128640 128709) + (128512 128591) + (128507 128511) + (128420 128420) + (128405 128406) + (128378 128378) + (128336 128359) + (128331 128334) + (128255 128317) + (128066 128252) + (128064 128064) + (128000 128062) + (127995 127999) + (127992 127994) + (127988 127988) + (127968 127984) + (127951 127955) + (127904 127946) + (127870 127891) + (127799 127868) + (127789 127797) + (127744 127776) + (127584 127589) + (127568 127569) + (127552 127560) + (127504 127547) + (127488 127490) + (127377 127386) + (127374 127374) + (127183 127183) + (126980 126980) + (110960 111355) + (110948 110951) + (110928 110930) + (110848 110878) + (110592 110847) + (101632 101640) + (101120 101589) + (100352 101119) + (94208 100343) + (94192 94193) + (94180 94180) + (94179 94179) + (94178 94178) + (94176 94177) + (65509 65510) + (65508 65508) + (65507 65507) + (65506 65506) + (65504 65505) + (65376 65376) + (65375 65375) + (65374 65374) + (65373 65373) + (65372 65372) + (65371 65371) + (65345 65370) + (65344 65344) + (65343 65343) + (65342 65342) + (65341 65341) + (65340 65340) + (65339 65339) + (65313 65338) + (65311 65312) + (65308 65310) + (65306 65307) + (65296 65305) + (65294 65295) + (65293 65293) + (65292 65292) + (65291 65291) + (65290 65290) + (65289 65289) + (65288 65288) + (65285 65287) + (65284 65284) + (65281 65283) + (65130 65131) + (65129 65129) + (65128 65128) + (65124 65126) + (65123 65123) + (65122 65122) + (65119 65121) + (65118 65118) + (65117 65117) + (65116 65116) + (65115 65115) + (65114 65114) + (65113 65113) + (65112 65112) + (65108 65111) + (65104 65106) + (65101 65103) + (65097 65100) + (65096 65096) + (65095 65095) + (65093 65094) + (65092 65092) + (65091 65091) + (65090 65090) + (65089 65089) + (65088 65088) + (65087 65087) + (65086 65086) + (65085 65085) + (65084 65084) + (65083 65083) + (65082 65082) + (65081 65081) + (65080 65080) + (65079 65079) + (65078 65078) + (65077 65077) + (65075 65076) + (65073 65074) + (65072 65072) + (65049 65049) + (65048 65048) + (65047 65047) + (65040 65046) + (64218 64255) + (64112 64217) + (64110 64111) + (63744 64109) + (44032 55203) + (43360 43388) + (42128 42182) + (40982 42124) + (40981 40981) + (40960 40980) + (40957 40959) + (19968 40956) + (13312 19903) + (13056 13311) + (12992 13055) + (12977 12991) + (12938 12976) + (12928 12937) + (12896 12927) + (12881 12895) + (12880 12880) + (12842 12871) + (12832 12841) + (12800 12830) + (12784 12799) + (12736 12771) + (12704 12735) + (12694 12703) + (12690 12693) + (12688 12689) + (12593 12686) + (12549 12591) + (12543 12543) + (12540 12542) + (12539 12539) + (12449 12538) + (12448 12448) + (12447 12447) + (12445 12446) + (12443 12444) + (12353 12438) + (12350 12350) + (12349 12349) + (12348 12348) + (12347 12347) + (12344 12346) + (12342 12343) + (12337 12341) + (12336 12336) + (12334 12335) + (12330 12333) + (12321 12329) + (12320 12320) + (12318 12319) + (12317 12317) + (12316 12316) + (12315 12315) + (12314 12314) + (12313 12313) + (12312 12312) + (12311 12311) + (12310 12310) + (12309 12309) + (12308 12308) + (12306 12307) + (12305 12305) + (12304 12304) + (12303 12303) + (12302 12302) + (12301 12301) + (12300 12300) + (12299 12299) + (12298 12298) + (12297 12297) + (12296 12296) + (12295 12295) + (12294 12294) + (12293 12293) + (12292 12292) + (12289 12291) + (12288 12288) + (12272 12283) + (12032 12245) + (11931 12019) + (11904 11929) + (11093 11093) + (11088 11088) + (11035 11036) + (10175 10175) + (10160 10160) + (10133 10135) + (10071 10071) + (10067 10069) + (10062 10062) + (10060 10060) + (10024 10024) + (9994 9995) + (9989 9989) + (9981 9981) + (9978 9978) + (9973 9973) + (9970 9971) + (9962 9962) + (9940 9940) + (9934 9934) + (9924 9925) + (9917 9918) + (9898 9899) + (9889 9889) + (9875 9875) + (9855 9855) + (9800 9811) + (9748 9749) + (9725 9726) + (9203 9203) + (9200 9200) + (9193 9196) + (9002 9002) + (9001 9001) + (8986 8987) + (4352 4447)) + (combining + (125136 125142) + (122918 122922) + (122915 122916) + (122907 122913) + (122888 122904) + (122880 122886) + (119362 119364) + (119210 119213) + (119173 119179) + (119163 119170) + (119149 119154) + (119143 119145) + (119141 119142) + (92912 92916) + (70512 70516) + (70502 70508) + (70459 70460) + (70400 70401) + (69446 69456) + (69291 69292) + (66422 66426) + (66045 66045) + (65056 65071) + (43232 43249) + (42736 42737) + (42654 42655) + (42612 42621) + (42608 42610) + (42607 42607) + (12441 12442) + (11744 11775) + (11503 11505) + (8421 8432) + (8418 8420) + (8417 8417) + (8413 8416) + (8400 8412) + (7675 7679) + (7616 7673) + (7019 7027) + (6847 6848) + (6846 6846) + (6832 6845) + (6783 6783) + (4957 4959) + (3328 3329) + (3076 3076) + (3072 3072) + (2027 2035) + (1160 1161) + (1155 1159) + (768 879)) (neutral (917536 917631) (917505 917505) @@ -2220,284 +2498,7 @@ (160 160) (128 159) (127 127) - (0 31)) - (doublewidth - (201547 262141) - (196608 201546) - (195104 196605) - (195102 195103) - (194560 195101) - (191457 194559) - (183984 191456) - (183970 183983) - (178208 183969) - (178206 178207) - (177984 178205) - (177973 177983) - (173824 177972) - (173790 173823) - (131072 173789) - (129744 129750) - (129728 129730) - (129712 129718) - (129680 129704) - (129664 129670) - (129656 129658) - (129648 129652) - (129485 129535) - (129402 129483) - (129351 129400) - (129340 129349) - (129292 129338) - (128992 129003) - (128756 128764) - (128747 128748) - (128725 128727) - (128720 128722) - (128716 128716) - (128640 128709) - (128512 128591) - (128507 128511) - (128420 128420) - (128405 128406) - (128378 128378) - (128336 128359) - (128331 128334) - (128255 128317) - (128066 128252) - (128064 128064) - (128000 128062) - (127995 127999) - (127992 127994) - (127988 127988) - (127968 127984) - (127951 127955) - (127904 127946) - (127870 127891) - (127799 127868) - (127789 127797) - (127744 127776) - (127584 127589) - (127568 127569) - (127552 127560) - (127504 127547) - (127488 127490) - (127377 127386) - (127374 127374) - (127183 127183) - (126980 126980) - (110960 111355) - (110948 110951) - (110928 110930) - (110848 110878) - (110592 110847) - (101632 101640) - (101120 101589) - (100352 101119) - (94208 100343) - (94192 94193) - (94180 94180) - (94179 94179) - (94178 94178) - (94176 94177) - (65509 65510) - (65508 65508) - (65507 65507) - (65506 65506) - (65504 65505) - (65376 65376) - (65375 65375) - (65374 65374) - (65373 65373) - (65372 65372) - (65371 65371) - (65345 65370) - (65344 65344) - (65343 65343) - (65342 65342) - (65341 65341) - (65340 65340) - (65339 65339) - (65313 65338) - (65311 65312) - (65308 65310) - (65306 65307) - (65296 65305) - (65294 65295) - (65293 65293) - (65292 65292) - (65291 65291) - (65290 65290) - (65289 65289) - (65288 65288) - (65285 65287) - (65284 65284) - (65281 65283) - (65130 65131) - (65129 65129) - (65128 65128) - (65124 65126) - (65123 65123) - (65122 65122) - (65119 65121) - (65118 65118) - (65117 65117) - (65116 65116) - (65115 65115) - (65114 65114) - (65113 65113) - (65112 65112) - (65108 65111) - (65104 65106) - (65101 65103) - (65097 65100) - (65096 65096) - (65095 65095) - (65093 65094) - (65092 65092) - (65091 65091) - (65090 65090) - (65089 65089) - (65088 65088) - (65087 65087) - (65086 65086) - (65085 65085) - (65084 65084) - (65083 65083) - (65082 65082) - (65081 65081) - (65080 65080) - (65079 65079) - (65078 65078) - (65077 65077) - (65075 65076) - (65073 65074) - (65072 65072) - (65049 65049) - (65048 65048) - (65047 65047) - (65040 65046) - (64218 64255) - (64112 64217) - (64110 64111) - (63744 64109) - (44032 55203) - (43360 43388) - (42128 42182) - (40982 42124) - (40981 40981) - (40960 40980) - (40957 40959) - (19968 40956) - (13312 19903) - (13056 13311) - (12992 13055) - (12977 12991) - (12938 12976) - (12928 12937) - (12896 12927) - (12881 12895) - (12880 12880) - (12842 12871) - (12832 12841) - (12800 12830) - (12784 12799) - (12736 12771) - (12704 12735) - (12694 12703) - (12690 12693) - (12688 12689) - (12593 12686) - (12549 12591) - (12543 12543) - (12540 12542) - (12539 12539) - (12449 12538) - (12448 12448) - (12447 12447) - (12445 12446) - (12443 12444) - (12353 12438) - (12350 12350) - (12349 12349) - (12348 12348) - (12347 12347) - (12344 12346) - (12342 12343) - (12337 12341) - (12336 12336) - (12334 12335) - (12330 12333) - (12321 12329) - (12320 12320) - (12318 12319) - (12317 12317) - (12316 12316) - (12315 12315) - (12314 12314) - (12313 12313) - (12312 12312) - (12311 12311) - (12310 12310) - (12309 12309) - (12308 12308) - (12306 12307) - (12305 12305) - (12304 12304) - (12303 12303) - (12302 12302) - (12301 12301) - (12300 12300) - (12299 12299) - (12298 12298) - (12297 12297) - (12296 12296) - (12295 12295) - (12294 12294) - (12293 12293) - (12292 12292) - (12289 12291) - (12288 12288) - (12272 12283) - (12032 12245) - (11931 12019) - (11904 11929) - (11093 11093) - (11088 11088) - (11035 11036) - (10175 10175) - (10160 10160) - (10133 10135) - (10071 10071) - (10067 10069) - (10062 10062) - (10060 10060) - (10024 10024) - (9994 9995) - (9989 9989) - (9981 9981) - (9978 9978) - (9973 9973) - (9970 9971) - (9962 9962) - (9940 9940) - (9934 9934) - (9924 9925) - (9917 9918) - (9898 9899) - (9889 9889) - (9875 9875) - (9855 9855) - (9800 9811) - (9748 9749) - (9725 9726) - (9203 9203) - (9200 9200) - (9193 9196) - (9002 9002) - (9001 9001) - (8986 8987) - (4352 4447))))) + (0 31))))) (define char-set:eastasian-combining (char-set)) (define char-set:eastasian-doublewidth @@ -2507,6 +2508,15 @@ (define char-set:eastasian-neutral (char-set)) (define char-set:eastasian-ambiguous (char-set)) +(define eastasian-charsets + (list (list 'combining char-set:eastasian-combining) + (list 'doublewidth + char-set:eastasian-doublewidth) + (list 'halfwidth char-set:eastasian-halfwidth) + (list 'narrow char-set:eastasian-narrow) + (list 'neutral char-set:eastasian-neutral) + (list 'ambiguous char-set:eastasian-ambiguous))) + (ranges->charset! eastasian-ht 'combining diff --git a/runewidth/eastasian/locale.scm b/uniseg/eastasian/locale.scm similarity index 93% rename from runewidth/eastasian/locale.scm rename to uniseg/eastasian/locale.scm index abb5dcb..94df3ba 100644 --- a/runewidth/eastasian/locale.scm +++ b/uniseg/eastasian/locale.scm @@ -1,4 +1,4 @@ -(define-module (runewidth eastasian locale) +(define-module (uniseg eastasian locale) #:use-module (ice-9 regex) #:export (eastasian-locale?)) @@ -21,7 +21,7 @@ "gb2312")) ;; algorithm from: -;; https://github.com/mattn/go-runewidth/blob/master/runewidth_posix.go +;; https://github.com/mattn/go-uniseg/blob/master/uniseg_posix.go ;; For extracting the charset part of the locale string (some locales require this) ;; Note regex and capture group different as guile does not support 'non-capturing group' syntax diff --git a/runewidth/emoji.scm b/uniseg/emoji.scm similarity index 98% rename from runewidth/emoji.scm rename to uniseg/emoji.scm index 6a433f6..9b3f041 100644 --- a/runewidth/emoji.scm +++ b/uniseg/emoji.scm @@ -1,11 +1,11 @@ ;; Code generated by scripts/generate-emoji. DO NOT EDIT (define-module - (runewidth emoji) + (uniseg emoji) #:use-module (srfi srfi-1) #:export - (char-set:emoji)) + (char-set:extended-pictographic)) (define emoji-list '((130048 131069) (129751 129791) @@ -497,7 +497,8 @@ (8265 8265) (8252 8252))) -(define char-set:emoji (char-set)) +(define char-set:extended-pictographic + (char-set)) (for-each (λ (pair) @@ -505,5 +506,5 @@ (first pair) (+ 1 (second pair)) #t - char-set:emoji)) + char-set:extended-pictographic)) emoji-list) diff --git a/runewidth/graphemes.scm b/uniseg/graphemes.scm similarity index 94% rename from runewidth/graphemes.scm rename to uniseg/graphemes.scm index cffda53..9af0d48 100644 --- a/runewidth/graphemes.scm +++ b/uniseg/graphemes.scm @@ -1,13 +1,13 @@ ;; Code generated by scripts/generate-graphemes. DO NOT EDIT (define-module - (runewidth graphemes) + (uniseg graphemes) #:use-module (ice-9 hash-table) #:use-module (srfi srfi-1) #:use-module - (runewidth internal) + (uniseg internal) #:export (char-set:grapheme-hangul-syllable-l char-set:grapheme-hangul-syllable-v @@ -20,10 +20,434 @@ char-set:grapheme-extend char-set:grapheme-regional-indicator char-set:grapheme-spacing-mark - char-set:grapheme-zerowidth-joiner)) + char-set:grapheme-zerowidth-joiner + grapheme-charsets)) (define grapheme-ht (alist->hashq-table - '((regional-indicator (127462 127487)) + '((prepend + (73474 73474) + (73030 73030) + (72324 72329) + (72250 72250) + (72001 72001) + (71999 71999) + (70082 70083) + (69837 69837) + (69821 69821) + (3406 3406) + (2274 2274) + (2192 2193) + (1807 1807) + (1757 1757) + (1536 1541)) + (regional-indicator (127462 127487)) + (carriage-return (13 13)) + (extend + (917760 917999) + (917536 917631) + (127995 127999) + (125252 125258) + (125136 125142) + (124140 124143) + (123628 123631) + (123566 123566) + (123184 123190) + (123023 123023) + (122918 122922) + (122915 122916) + (122907 122913) + (122888 122904) + (122880 122886) + (121505 121519) + (121499 121503) + (121476 121476) + (121461 121461) + (121403 121452) + (121344 121398) + (119362 119364) + (119210 119213) + (119173 119179) + (119163 119170) + (119150 119154) + (119143 119145) + (119141 119141) + (118576 118598) + (118528 118573) + (113821 113822) + (94180 94180) + (94095 94098) + (94031 94031) + (92976 92982) + (92912 92916) + (78919 78933) + (78912 78912) + (73538 73538) + (73536 73536) + (73526 73530) + (73472 73473) + (73459 73460) + (73111 73111) + (73109 73109) + (73104 73105) + (73031 73031) + (73023 73029) + (73020 73021) + (73018 73018) + (73009 73014) + (72885 72886) + (72882 72883) + (72874 72880) + (72850 72871) + (72767 72767) + (72760 72765) + (72752 72758) + (72344 72345) + (72330 72342) + (72281 72283) + (72273 72278) + (72263 72263) + (72251 72254) + (72243 72248) + (72193 72202) + (72160 72160) + (72154 72155) + (72148 72151) + (72003 72003) + (71998 71998) + (71995 71996) + (71984 71984) + (71737 71738) + (71727 71735) + (71463 71467) + (71458 71461) + (71453 71455) + (71351 71351) + (71344 71349) + (71341 71341) + (71339 71339) + (71231 71232) + (71229 71229) + (71219 71226) + (71132 71133) + (71103 71104) + (71100 71101) + (71090 71093) + (71087 71087) + (70850 70851) + (70847 70848) + (70845 70845) + (70842 70842) + (70835 70840) + (70832 70832) + (70750 70750) + (70726 70726) + (70722 70724) + (70712 70719) + (70512 70516) + (70502 70508) + (70487 70487) + (70464 70464) + (70462 70462) + (70459 70460) + (70400 70401) + (70371 70378) + (70367 70367) + (70209 70209) + (70206 70206) + (70198 70199) + (70196 70196) + (70191 70193) + (70095 70095) + (70089 70092) + (70070 70078) + (70016 70017) + (70003 70003) + (69933 69940) + (69927 69931) + (69888 69890) + (69826 69826) + (69817 69818) + (69811 69814) + (69759 69761) + (69747 69748) + (69744 69744) + (69688 69702) + (69633 69633) + (69506 69509) + (69446 69456) + (69373 69375) + (69291 69292) + (68900 68903) + (68325 68326) + (68159 68159) + (68152 68154) + (68108 68111) + (68101 68102) + (68097 68099) + (66422 66426) + (66272 66272) + (66045 66045) + (65438 65439) + (65056 65071) + (65024 65039) + (64286 64286) + (44013 44013) + (44008 44008) + (44005 44005) + (43766 43766) + (43756 43757) + (43713 43713) + (43710 43711) + (43703 43704) + (43698 43700) + (43696 43696) + (43644 43644) + (43596 43596) + (43587 43587) + (43573 43574) + (43569 43570) + (43561 43566) + (43493 43493) + (43452 43453) + (43446 43449) + (43443 43443) + (43392 43394) + (43335 43345) + (43302 43309) + (43263 43263) + (43232 43249) + (43204 43205) + (43052 43052) + (43045 43046) + (43019 43019) + (43014 43014) + (43010 43010) + (42736 42737) + (42654 42655) + (42612 42621) + (42608 42610) + (42607 42607) + (12441 12442) + (12334 12335) + (12330 12333) + (11744 11775) + (11647 11647) + (11503 11505) + (8421 8432) + (8418 8420) + (8417 8417) + (8413 8416) + (8400 8412) + (8204 8204) + (7616 7679) + (7416 7417) + (7412 7412) + (7405 7405) + (7394 7400) + (7380 7392) + (7376 7378) + (7222 7223) + (7212 7219) + (7151 7153) + (7149 7149) + (7144 7145) + (7142 7142) + (7083 7085) + (7080 7081) + (7074 7077) + (7040 7041) + (7019 7027) + (6978 6978) + (6972 6972) + (6966 6970) + (6965 6965) + (6964 6964) + (6912 6915) + (6847 6862) + (6846 6846) + (6832 6845) + (6783 6783) + (6771 6780) + (6757 6764) + (6754 6754) + (6752 6752) + (6744 6750) + (6742 6742) + (6683 6683) + (6679 6680) + (6457 6459) + (6450 6450) + (6439 6440) + (6432 6434) + (6313 6313) + (6277 6278) + (6159 6159) + (6155 6157) + (6109 6109) + (6089 6099) + (6086 6086) + (6071 6077) + (6068 6069) + (6002 6003) + (5970 5971) + (5938 5939) + (5906 5908) + (4957 4959) + (4253 4253) + (4237 4237) + (4229 4230) + (4226 4226) + (4209 4212) + (4190 4192) + (4184 4185) + (4157 4158) + (4153 4154) + (4146 4151) + (4141 4144) + (4038 4038) + (3993 4028) + (3981 3991) + (3974 3975) + (3968 3972) + (3953 3966) + (3897 3897) + (3895 3895) + (3893 3893) + (3864 3865) + (3784 3790) + (3764 3772) + (3761 3761) + (3655 3662) + (3636 3642) + (3633 3633) + (3551 3551) + (3542 3542) + (3538 3540) + (3535 3535) + (3530 3530) + (3457 3457) + (3426 3427) + (3415 3415) + (3405 3405) + (3393 3396) + (3390 3390) + (3387 3388) + (3328 3329) + (3298 3299) + (3285 3286) + (3276 3277) + (3270 3270) + (3266 3266) + (3263 3263) + (3260 3260) + (3201 3201) + (3170 3171) + (3157 3158) + (3146 3149) + (3142 3144) + (3134 3136) + (3132 3132) + (3076 3076) + (3072 3072) + (3031 3031) + (3021 3021) + (3008 3008) + (3006 3006) + (2946 2946) + (2914 2915) + (2903 2903) + (2901 2902) + (2893 2893) + (2881 2884) + (2879 2879) + (2878 2878) + (2876 2876) + (2817 2817) + (2810 2815) + (2786 2787) + (2765 2765) + (2759 2760) + (2753 2757) + (2748 2748) + (2689 2690) + (2677 2677) + (2672 2673) + (2641 2641) + (2635 2637) + (2631 2632) + (2625 2626) + (2620 2620) + (2561 2562) + (2558 2558) + (2530 2531) + (2519 2519) + (2509 2509) + (2497 2500) + (2494 2494) + (2492 2492) + (2433 2433) + (2402 2403) + (2385 2391) + (2381 2381) + (2369 2376) + (2364 2364) + (2362 2362) + (2275 2306) + (2250 2273) + (2200 2207) + (2137 2139) + (2089 2093) + (2085 2087) + (2075 2083) + (2070 2073) + (2045 2045) + (2027 2035) + (1958 1968) + (1840 1866) + (1809 1809) + (1770 1773) + (1767 1768) + (1759 1764) + (1750 1756) + (1648 1648) + (1611 1631) + (1552 1562) + (1479 1479) + (1476 1477) + (1473 1474) + (1471 1471) + (1425 1469) + (1160 1161) + (1155 1159) + (768 879)) + (hangul-syllable-t (55243 55291) (4520 4607)) + (control + (918000 921599) + (917632 917759) + (917506 917535) + (917505 917505) + (917504 917504) + (119155 119162) + (113824 113827) + (78896 78911) + (65529 65531) + (65520 65528) + (65279 65279) + (8294 8303) + (8293 8293) + (8288 8292) + (8234 8238) + (8233 8233) + (8232 8232) + (8206 8207) + (8203 8203) + (6158 6158) + (1564 1564) + (173 173) + (127 159) + (14 31) + (11 12) + (0 9)) (hangul-syllable-lv (55176 55176) (55148 55148) @@ -424,6 +848,174 @@ (44088 44088) (44060 44060) (44032 44032)) + (line-feed (10 10)) + (hangul-syllable-v (55216 55238) (4448 4519)) + (spacing-mark + (119149 119149) + (119142 119142) + (94192 94193) + (94033 94087) + (73537 73537) + (73534 73535) + (73524 73525) + (73475 73475) + (73461 73462) + (73110 73110) + (73107 73108) + (73098 73102) + (72884 72884) + (72881 72881) + (72873 72873) + (72766 72766) + (72751 72751) + (72343 72343) + (72279 72280) + (72249 72249) + (72164 72164) + (72156 72159) + (72145 72147) + (72002 72002) + (72000 72000) + (71997 71997) + (71991 71992) + (71985 71989) + (71736 71736) + (71724 71726) + (71462 71462) + (71350 71350) + (71342 71343) + (71340 71340) + (71230 71230) + (71227 71228) + (71216 71218) + (71102 71102) + (71096 71099) + (71088 71089) + (70849 70849) + (70846 70846) + (70843 70844) + (70841 70841) + (70833 70834) + (70725 70725) + (70720 70721) + (70709 70711) + (70498 70499) + (70475 70477) + (70471 70472) + (70465 70468) + (70463 70463) + (70402 70403) + (70368 70370) + (70197 70197) + (70194 70195) + (70188 70190) + (70094 70094) + (70079 70080) + (70067 70069) + (70018 70018) + (69957 69958) + (69932 69932) + (69815 69816) + (69808 69810) + (69762 69762) + (69634 69634) + (69632 69632) + (44012 44012) + (44009 44010) + (44006 44007) + (44003 44004) + (43765 43765) + (43758 43759) + (43755 43755) + (43597 43597) + (43571 43572) + (43567 43568) + (43454 43456) + (43450 43451) + (43444 43445) + (43395 43395) + (43346 43347) + (43188 43203) + (43136 43137) + (43047 43047) + (43043 43044) + (7415 7415) + (7393 7393) + (7220 7221) + (7204 7211) + (7154 7155) + (7150 7150) + (7146 7148) + (7143 7143) + (7082 7082) + (7078 7079) + (7073 7073) + (7042 7042) + (6979 6980) + (6973 6977) + (6971 6971) + (6916 6916) + (6765 6770) + (6743 6743) + (6741 6741) + (6681 6682) + (6451 6456) + (6448 6449) + (6441 6443) + (6435 6438) + (6087 6088) + (6078 6085) + (6070 6070) + (5940 5940) + (5909 5909) + (4228 4228) + (4182 4183) + (4155 4156) + (4145 4145) + (3967 3967) + (3902 3903) + (3763 3763) + (3635 3635) + (3570 3571) + (3544 3550) + (3536 3537) + (3458 3459) + (3402 3404) + (3398 3400) + (3391 3392) + (3330 3331) + (3315 3315) + (3274 3275) + (3271 3272) + (3267 3268) + (3264 3265) + (3262 3262) + (3202 3203) + (3137 3140) + (3073 3075) + (3018 3020) + (3014 3016) + (3009 3010) + (3007 3007) + (2891 2892) + (2887 2888) + (2880 2880) + (2818 2819) + (2763 2764) + (2761 2761) + (2750 2752) + (2691 2691) + (2622 2624) + (2563 2563) + (2507 2508) + (2503 2504) + (2495 2496) + (2434 2435) + (2382 2383) + (2377 2380) + (2366 2368) + (2363 2363) + (2307 2307)) (hangul-syllable-lvt (55177 55203) (55149 55175) @@ -824,599 +1416,51 @@ (44089 44115) (44061 44087) (44033 44059)) - (prepend - (73474 73474) - (73030 73030) - (72324 72329) - (72250 72250) - (72001 72001) - (71999 71999) - (70082 70083) - (69837 69837) - (69821 69821) - (3406 3406) - (2274 2274) - (2192 2193) - (1807 1807) - (1757 1757) - (1536 1541)) (hangul-syllable-l (43360 43388) (4352 4447)) - (hangul-syllable-v (55216 55238) (4448 4519)) - (spacing-mark - (119149 119149) - (119142 119142) - (94192 94193) - (94033 94087) - (73537 73537) - (73534 73535) - (73524 73525) - (73475 73475) - (73461 73462) - (73110 73110) - (73107 73108) - (73098 73102) - (72884 72884) - (72881 72881) - (72873 72873) - (72766 72766) - (72751 72751) - (72343 72343) - (72279 72280) - (72249 72249) - (72164 72164) - (72156 72159) - (72145 72147) - (72002 72002) - (72000 72000) - (71997 71997) - (71991 71992) - (71985 71989) - (71736 71736) - (71724 71726) - (71462 71462) - (71350 71350) - (71342 71343) - (71340 71340) - (71230 71230) - (71227 71228) - (71216 71218) - (71102 71102) - (71096 71099) - (71088 71089) - (70849 70849) - (70846 70846) - (70843 70844) - (70841 70841) - (70833 70834) - (70725 70725) - (70720 70721) - (70709 70711) - (70498 70499) - (70475 70477) - (70471 70472) - (70465 70468) - (70463 70463) - (70402 70403) - (70368 70370) - (70197 70197) - (70194 70195) - (70188 70190) - (70094 70094) - (70079 70080) - (70067 70069) - (70018 70018) - (69957 69958) - (69932 69932) - (69815 69816) - (69808 69810) - (69762 69762) - (69634 69634) - (69632 69632) - (44012 44012) - (44009 44010) - (44006 44007) - (44003 44004) - (43765 43765) - (43758 43759) - (43755 43755) - (43597 43597) - (43571 43572) - (43567 43568) - (43454 43456) - (43450 43451) - (43444 43445) - (43395 43395) - (43346 43347) - (43188 43203) - (43136 43137) - (43047 43047) - (43043 43044) - (7415 7415) - (7393 7393) - (7220 7221) - (7204 7211) - (7154 7155) - (7150 7150) - (7146 7148) - (7143 7143) - (7082 7082) - (7078 7079) - (7073 7073) - (7042 7042) - (6979 6980) - (6973 6977) - (6971 6971) - (6916 6916) - (6765 6770) - (6743 6743) - (6741 6741) - (6681 6682) - (6451 6456) - (6448 6449) - (6441 6443) - (6435 6438) - (6087 6088) - (6078 6085) - (6070 6070) - (5940 5940) - (5909 5909) - (4228 4228) - (4182 4183) - (4155 4156) - (4145 4145) - (3967 3967) - (3902 3903) - (3763 3763) - (3635 3635) - (3570 3571) - (3544 3550) - (3536 3537) - (3458 3459) - (3402 3404) - (3398 3400) - (3391 3392) - (3330 3331) - (3315 3315) - (3274 3275) - (3271 3272) - (3267 3268) - (3264 3265) - (3262 3262) - (3202 3203) - (3137 3140) - (3073 3075) - (3018 3020) - (3014 3016) - (3009 3010) - (3007 3007) - (2891 2892) - (2887 2888) - (2880 2880) - (2818 2819) - (2763 2764) - (2761 2761) - (2750 2752) - (2691 2691) - (2622 2624) - (2563 2563) - (2507 2508) - (2503 2504) - (2495 2496) - (2434 2435) - (2382 2383) - (2377 2380) - (2366 2368) - (2363 2363) - (2307 2307)) - (carriage-return (13 13)) - (extend - (917760 917999) - (917536 917631) - (127995 127999) - (125252 125258) - (125136 125142) - (124140 124143) - (123628 123631) - (123566 123566) - (123184 123190) - (123023 123023) - (122918 122922) - (122915 122916) - (122907 122913) - (122888 122904) - (122880 122886) - (121505 121519) - (121499 121503) - (121476 121476) - (121461 121461) - (121403 121452) - (121344 121398) - (119362 119364) - (119210 119213) - (119173 119179) - (119163 119170) - (119150 119154) - (119143 119145) - (119141 119141) - (118576 118598) - (118528 118573) - (113821 113822) - (94180 94180) - (94095 94098) - (94031 94031) - (92976 92982) - (92912 92916) - (78919 78933) - (78912 78912) - (73538 73538) - (73536 73536) - (73526 73530) - (73472 73473) - (73459 73460) - (73111 73111) - (73109 73109) - (73104 73105) - (73031 73031) - (73023 73029) - (73020 73021) - (73018 73018) - (73009 73014) - (72885 72886) - (72882 72883) - (72874 72880) - (72850 72871) - (72767 72767) - (72760 72765) - (72752 72758) - (72344 72345) - (72330 72342) - (72281 72283) - (72273 72278) - (72263 72263) - (72251 72254) - (72243 72248) - (72193 72202) - (72160 72160) - (72154 72155) - (72148 72151) - (72003 72003) - (71998 71998) - (71995 71996) - (71984 71984) - (71737 71738) - (71727 71735) - (71463 71467) - (71458 71461) - (71453 71455) - (71351 71351) - (71344 71349) - (71341 71341) - (71339 71339) - (71231 71232) - (71229 71229) - (71219 71226) - (71132 71133) - (71103 71104) - (71100 71101) - (71090 71093) - (71087 71087) - (70850 70851) - (70847 70848) - (70845 70845) - (70842 70842) - (70835 70840) - (70832 70832) - (70750 70750) - (70726 70726) - (70722 70724) - (70712 70719) - (70512 70516) - (70502 70508) - (70487 70487) - (70464 70464) - (70462 70462) - (70459 70460) - (70400 70401) - (70371 70378) - (70367 70367) - (70209 70209) - (70206 70206) - (70198 70199) - (70196 70196) - (70191 70193) - (70095 70095) - (70089 70092) - (70070 70078) - (70016 70017) - (70003 70003) - (69933 69940) - (69927 69931) - (69888 69890) - (69826 69826) - (69817 69818) - (69811 69814) - (69759 69761) - (69747 69748) - (69744 69744) - (69688 69702) - (69633 69633) - (69506 69509) - (69446 69456) - (69373 69375) - (69291 69292) - (68900 68903) - (68325 68326) - (68159 68159) - (68152 68154) - (68108 68111) - (68101 68102) - (68097 68099) - (66422 66426) - (66272 66272) - (66045 66045) - (65438 65439) - (65056 65071) - (65024 65039) - (64286 64286) - (44013 44013) - (44008 44008) - (44005 44005) - (43766 43766) - (43756 43757) - (43713 43713) - (43710 43711) - (43703 43704) - (43698 43700) - (43696 43696) - (43644 43644) - (43596 43596) - (43587 43587) - (43573 43574) - (43569 43570) - (43561 43566) - (43493 43493) - (43452 43453) - (43446 43449) - (43443 43443) - (43392 43394) - (43335 43345) - (43302 43309) - (43263 43263) - (43232 43249) - (43204 43205) - (43052 43052) - (43045 43046) - (43019 43019) - (43014 43014) - (43010 43010) - (42736 42737) - (42654 42655) - (42612 42621) - (42608 42610) - (42607 42607) - (12441 12442) - (12334 12335) - (12330 12333) - (11744 11775) - (11647 11647) - (11503 11505) - (8421 8432) - (8418 8420) - (8417 8417) - (8413 8416) - (8400 8412) - (8204 8204) - (7616 7679) - (7416 7417) - (7412 7412) - (7405 7405) - (7394 7400) - (7380 7392) - (7376 7378) - (7222 7223) - (7212 7219) - (7151 7153) - (7149 7149) - (7144 7145) - (7142 7142) - (7083 7085) - (7080 7081) - (7074 7077) - (7040 7041) - (7019 7027) - (6978 6978) - (6972 6972) - (6966 6970) - (6965 6965) - (6964 6964) - (6912 6915) - (6847 6862) - (6846 6846) - (6832 6845) - (6783 6783) - (6771 6780) - (6757 6764) - (6754 6754) - (6752 6752) - (6744 6750) - (6742 6742) - (6683 6683) - (6679 6680) - (6457 6459) - (6450 6450) - (6439 6440) - (6432 6434) - (6313 6313) - (6277 6278) - (6159 6159) - (6155 6157) - (6109 6109) - (6089 6099) - (6086 6086) - (6071 6077) - (6068 6069) - (6002 6003) - (5970 5971) - (5938 5939) - (5906 5908) - (4957 4959) - (4253 4253) - (4237 4237) - (4229 4230) - (4226 4226) - (4209 4212) - (4190 4192) - (4184 4185) - (4157 4158) - (4153 4154) - (4146 4151) - (4141 4144) - (4038 4038) - (3993 4028) - (3981 3991) - (3974 3975) - (3968 3972) - (3953 3966) - (3897 3897) - (3895 3895) - (3893 3893) - (3864 3865) - (3784 3790) - (3764 3772) - (3761 3761) - (3655 3662) - (3636 3642) - (3633 3633) - (3551 3551) - (3542 3542) - (3538 3540) - (3535 3535) - (3530 3530) - (3457 3457) - (3426 3427) - (3415 3415) - (3405 3405) - (3393 3396) - (3390 3390) - (3387 3388) - (3328 3329) - (3298 3299) - (3285 3286) - (3276 3277) - (3270 3270) - (3266 3266) - (3263 3263) - (3260 3260) - (3201 3201) - (3170 3171) - (3157 3158) - (3146 3149) - (3142 3144) - (3134 3136) - (3132 3132) - (3076 3076) - (3072 3072) - (3031 3031) - (3021 3021) - (3008 3008) - (3006 3006) - (2946 2946) - (2914 2915) - (2903 2903) - (2901 2902) - (2893 2893) - (2881 2884) - (2879 2879) - (2878 2878) - (2876 2876) - (2817 2817) - (2810 2815) - (2786 2787) - (2765 2765) - (2759 2760) - (2753 2757) - (2748 2748) - (2689 2690) - (2677 2677) - (2672 2673) - (2641 2641) - (2635 2637) - (2631 2632) - (2625 2626) - (2620 2620) - (2561 2562) - (2558 2558) - (2530 2531) - (2519 2519) - (2509 2509) - (2497 2500) - (2494 2494) - (2492 2492) - (2433 2433) - (2402 2403) - (2385 2391) - (2381 2381) - (2369 2376) - (2364 2364) - (2362 2362) - (2275 2306) - (2250 2273) - (2200 2207) - (2137 2139) - (2089 2093) - (2085 2087) - (2075 2083) - (2070 2073) - (2045 2045) - (2027 2035) - (1958 1968) - (1840 1866) - (1809 1809) - (1770 1773) - (1767 1768) - (1759 1764) - (1750 1756) - (1648 1648) - (1611 1631) - (1552 1562) - (1479 1479) - (1476 1477) - (1473 1474) - (1471 1471) - (1425 1469) - (1160 1161) - (1155 1159) - (768 879)) - (line-feed (10 10)) - (zerowidth-joiner (8205 8205)) - (hangul-syllable-t (55243 55291) (4520 4607)) - (control - (918000 921599) - (917632 917759) - (917506 917535) - (917505 917505) - (917504 917504) - (119155 119162) - (113824 113827) - (78896 78911) - (65529 65531) - (65520 65528) - (65279 65279) - (8294 8303) - (8293 8293) - (8288 8292) - (8234 8238) - (8233 8233) - (8232 8232) - (8206 8207) - (8203 8203) - (6158 6158) - (1564 1564) - (173 173) - (127 159) - (14 31) - (11 12) - (0 9))))) + (zerowidth-joiner (8205 8205))))) + +(define char-set:grapheme-hangul-syllable-l + (char-set)) +(define char-set:grapheme-hangul-syllable-v + (char-set)) +(define char-set:grapheme-hangul-syllable-lv + (char-set)) +(define char-set:grapheme-hangul-syllable-lvt + (char-set)) +(define char-set:grapheme-prepend (char-set)) +(define char-set:grapheme-carriage-return + (char-set)) +(define char-set:grapheme-line-feed (char-set)) +(define char-set:grapheme-control (char-set)) +(define char-set:grapheme-extend (char-set)) +(define char-set:grapheme-regional-indicator + (char-set)) +(define char-set:grapheme-spacing-mark + (char-set)) +(define char-set:grapheme-zerowidth-joiner + (char-set)) + +(define grapheme-charsets + (list (list 'hangul-syllable-l + char-set:grapheme-hangul-syllable-l) + (list 'hangul-syllable-v + char-set:grapheme-hangul-syllable-v) + (list 'hangul-syllable-lv + char-set:grapheme-hangul-syllable-lv) + (list 'hangul-syllable-lvt + char-set:grapheme-hangul-syllable-lvt) + (list 'prepend char-set:grapheme-prepend) + (list 'carriage-return + char-set:grapheme-carriage-return) + (list 'line-feed char-set:grapheme-line-feed) + (list 'control char-set:grapheme-control) + (list 'extend char-set:grapheme-extend) + (list 'regional-indicator + char-set:grapheme-regional-indicator) + (list 'spacing-mark + char-set:grapheme-spacing-mark) + (list 'zerowidth-joiner + char-set:grapheme-zerowidth-joiner))) (ranges->charset! grapheme-ht diff --git a/uniseg/graphemes/stream.scm b/uniseg/graphemes/stream.scm new file mode 100644 index 0000000..511d109 --- /dev/null +++ b/uniseg/graphemes/stream.scm @@ -0,0 +1,22 @@ +(define-module (runewidth graphemes stream) + #:use-module (srfi srfi-41) + #:use-module (srfi srfi-9 gnu) + #:export (make-grapheme + + input->grapheme-stream)) + +(define-immutable-record-type + (make-grapheme glyphs width sentence-end? word-end?) + grapheme? + (glyphs grapheme-glyphs) + (width grapheme-width) + (sentence-end? grapheme-sentence-end?) + (word-end? grapheme-word-end?)) + +(define-stream (input->grapheme-stream port) + (unless port + (set! port (current-input-port))) + + (define gr (make-grapheme glyphs width sentence-end? word-end?)) + + (stream-cons c (input->grapheme-stream port))) diff --git a/runewidth/hconfig.scm b/uniseg/hconfig.scm similarity index 87% rename from runewidth/hconfig.scm rename to uniseg/hconfig.scm index da1f4b9..45aaa40 100644 --- a/runewidth/hconfig.scm +++ b/uniseg/hconfig.scm @@ -1,5 +1,5 @@ (define-module - (runewidth hconfig) + (uniseg hconfig) #:use-module (srfi srfi-26) #:export @@ -21,7 +21,7 @@ (define %copyright '(2024)) -(define %gettext-domain "guile-runewidth") +(define %gettext-domain "guile-uniseg") (define G_ identity) diff --git a/runewidth/internal.scm b/uniseg/internal.scm similarity index 98% rename from runewidth/internal.scm rename to uniseg/internal.scm index fffbd6f..7da6e1d 100644 --- a/runewidth/internal.scm +++ b/uniseg/internal.scm @@ -1,4 +1,4 @@ -(define-module (runewidth internal) +(define-module (uniseg internal) #:use-module (ice-9 peg) #:use-module (ice-9 textual-ports) #:use-module (ice-9 exceptions)