Renaming to uniseg

This commit is contained in:
Vivianne 2024-03-03 13:13:22 -05:00
parent 4a93c70b79
commit f1c03097f1
18 changed files with 1143 additions and 971 deletions

View file

@ -1,10 +1,10 @@
# -*- mode: org; coding: utf-8; -*-
#+TITLE: Hacking guile-runewidth
#+TITLE: Hacking guile-uniseg
* Contributing
By far the easiest way to hack on guile-runewidth is to develop using Guix:
By far the easiest way to hack on guile-uniseg is to develop using Guix:
#+BEGIN_SRC bash
# Obtain the source code

View file

@ -1,14 +1,14 @@
# -*- mode: org; coding: utf-8; -*-
#+TITLE: README for Guile Runewidth
#+TITLE: README for Guile Uniseg
~guile-runewidth~ is a simple Guile library that provides several new Guile character sets for categorizing
~guile-uniseg~ is a simple Guile library that provides several new Guile character sets for categorizing
East Asian characters and emoji. This can be useful when determining the width of characters.
This library is heavily inspired by the golang library ~runewidth~.
This library is heavily inspired by the golang libraries ~uniseg~ and ~uniseg~.
* Scripts
There are two scripts which can be used to regenerate the character set files. These are optional to run, since
the script's output is stored in the repository at ~runewidth/eastasian.scm~ and ~runewidth/emoji.scm~.
the script's output is stored in the repository at ~uniseg/eastasian.scm~ and ~uniseg/emoji.scm~.
However, should the source change as the unicode spec changes, these can be refreshed.

View file

@ -27,7 +27,7 @@ Documentation License''.
@titlepage
@title The Reflow Manual
@author
@author Vivianne Langdon
@page
@vskip 0pt plus 1filll

View file

@ -13,7 +13,7 @@
(srfi srfi-1))
(package
(name "guile-runewidth")
(name "guile-uniseg")
(version "0.1")
(source
(local-file
@ -92,6 +92,6 @@
(description
"A library that provides guile character sets and operations to work with runes that are more than a single character width.")
(home-page
"https://git.solarpunk.moe/vv/guile-runewidth")
"https://git.solarpunk.moe/vv/guile-uniseg")
(license license:gpl3+))

View file

@ -1,5 +1,5 @@
(hall-description
(name "runewidth")
(name "uniseg")
(prefix "guile")
(version "0.1")
(author "Vivanne Langdon")
@ -9,7 +9,7 @@
(description
"A library that provides guile character sets and operations to work with runes that are more than a single character width.")
(home-page
"https://git.solarpunk.moe/vv/guile-runewidth")
"https://git.solarpunk.moe/vv/guile-uniseg")
(license gpl3+)
(dependencies `())
(skip ())
@ -19,12 +19,13 @@
(native-language-support #f)
(licensing #f)))
(files (libraries
((scheme-file "runewidth")
((scheme-file "uniseg")
(directory
"runewidth"
"uniseg"
((scheme-file "emoji")
(directory "eastasian" ((scheme-file "locale")))
(scheme-file "eastasian")
(directory "graphemes" ((scheme-file "stream")))
(scheme-file "graphemes")
(scheme-file "internal")))))
(tests ((directory
@ -41,7 +42,7 @@
(symlink "README" "README.org")
(text-file "HACKING")
(text-file "COPYING")
(directory "doc" ((texi-file "guile-runewidth")))))
(directory "doc" ((texi-file "guile-uniseg")))))
(infrastructure
((scheme-file "guix")
(text-file ".gitignore")

View file

View file

@ -5,7 +5,7 @@
;; Can be called with a trailing argument pointing to the file on disk.
(use-modules
(runewidth internal)
(uniseg internal)
(ice-9 pretty-print)
(ice-9 peg)
(ice-9 format)
@ -97,7 +97,7 @@
(λ ()
(wget-to-lines east-asian-url stdout))))
(define file "runewidth/eastasian.scm")
(define file "uniseg/eastasian.scm")
(format stdout "Writing to ~a...\n" file)
@ -108,12 +108,13 @@
(for-each process-east-asian-line (line-func))
(pretty-print
`(define-module (runewidth eastasian)
`(define-module (uniseg eastasian)
#:use-module (ice-9 hash-table)
#:use-module (srfi srfi-1)
#:use-module (runewidth internal)
#:use-module (uniseg internal)
#:export
,ea-symbol-names))
(,@ea-symbol-names
eastasian-charsets)))
(pretty-print
`(define eastasian-ht
@ -129,6 +130,18 @@
(display "\n")
(pretty-print
`(define eastasian-charsets
(list
,@(map
(λ (pair)
(let ((f (first pair))
(s (second pair)))
`(list ',f ,s)))
ea-sets-and-symbols))))
(display "\n")
(for-each
(λ (set-pair)
(let ((name (first set-pair))

View file

@ -5,7 +5,7 @@
;; Can be called with a trailing argument pointing to the file on disk.
(use-modules
(runewidth internal)
(uniseg internal)
(ice-9 pretty-print)
(ice-9 peg)
(ice-9 format)
@ -66,7 +66,7 @@
(λ ()
(wget-to-lines emoji-url stdout))))
(define file "runewidth/emoji.scm")
(define file "uniseg/emoji.scm")
(format stdout "Writing to ~a...\n" file)
@ -77,9 +77,9 @@
(for-each process-emoji-line (line-func))
(pretty-print
`(define-module (runewidth emoji)
`(define-module (uniseg emoji)
#:use-module (srfi srfi-1)
#:export (char-set:emoji)))
#:export (char-set:extended-pictographic)))
(pretty-print
`(define emoji-list ',emoji-list))
@ -87,7 +87,7 @@
(display "\n")
(pretty-print
`(define char-set:emoji (char-set)))
`(define char-set:extended-pictographic (char-set)))
(display "\n")
@ -97,7 +97,7 @@
(ucs-range->char-set!
(first pair)
(+ 1 (second pair))
#t char-set:emoji))
#t char-set:extended-pictographic))
emoji-list))
(display "Code generation complete.\n" stdout)))

View file

@ -5,7 +5,7 @@
;; Can be called with a trailing argument pointing to the file on disk.
(use-modules
(runewidth internal)
(uniseg internal)
(ice-9 pretty-print)
(ice-9 peg)
(ice-9 format)
@ -108,7 +108,7 @@
(λ ()
(wget-to-lines grapheme-url stdout))))
(define file "runewidth/graphemes.scm")
(define file "uniseg/graphemes.scm")
(format stdout "Writing to ~a...\n" file)
@ -119,11 +119,12 @@
(for-each process-grapheme-line (line-func))
(pretty-print
`(define-module (runewidth graphemes)
`(define-module (uniseg graphemes)
#:use-module (ice-9 hash-table)
#:use-module (srfi srfi-1)
#:use-module (runewidth internal)
#:export ,grapheme-symbol-names))
#:use-module (uniseg internal)
#:export ( ,@grapheme-symbol-names
grapheme-charsets)))
(pretty-print
@ -132,6 +133,26 @@
(display "\n")
(for-each
(λ (sym)
(pretty-print
`(define ,sym (char-set))))
grapheme-symbol-names)
(display "\n")
(pretty-print
`(define grapheme-charsets
(list
,@(map
(λ (pair)
(let ((f (first pair))
(s (second pair)))
`(list ',f ,s)))
grapheme-sets-and-symbols))))
(display "\n")
(for-each
(λ (set-pair)
(let ((name (first set-pair))

View file

@ -1,5 +1,5 @@
(define-module (tests test-eastasian-locale)
#:use-module (runewidth eastasian locale)
#:use-module (uniseg eastasian locale)
#:use-module (srfi srfi-64))
(test-begin "test-eastasian-locale")

60
uniseg.scm Normal file
View file

@ -0,0 +1,60 @@
(define-module (uniseg)
#:use-module (srfi srfi-1)
#:use-module (ice-9 match)
#:use-module (uniseg emoji)
#:use-module (uniseg graphemes)
#:use-module (uniseg eastasian)
#:export (emoji?
grapheme-property
eastasian-property))
(define (emoji? char)
(char-set-contains? char-set:extended-pictographic char))
(define (get-prop sets char)
(let ((pair
(find
(λ (p) (char-set-contains? (second p) char))
sets)))
(and pair
(first pair))))
(define (grapheme-property char)
"Find the unicode grapheme cluster property, as defined by https://www.unicode.org/reports/tr44/"
(or (get-prop grapheme-charsets char)
'other))
(define (eastasian-property char)
(define (<==> lo hi)
"Create a range predicate, inclusive between two numbers"
(λ (num)
(and (>= num lo)
(<= num hi))))
(define range-unified-ideographs-ext-a?
(<==> #x3400 #x4DBF))
(define range-unified-ideographs?
(<==> #x4E00 #x9FFF))
(define range-compatibility-ideographs?
(<==> #xF900 #xFAFF))
(define range-plane-2?
(<==> #x20000 #x2FFFD))
(define range-plane-3?
(<==> #x30000 #x3FFFD))
;; The spec defines any values not in the table be wide, if
;; they are in certain ranges. So let's take care of that here.
;; These characters are not in the char-sets as they are undesignated.
;; See comments in unicode's EastAsianWidth.txt
(or (get-prop eastasian-charsets char)
(let ((int (char->integer char)))
(match int
((or (? range-unified-ideographs-ext-a?)
(? range-unified-ideographs?)
(? range-compatibility-ideographs?)
(? range-plane-2?)
(? range-plane-3?))
'doublewidth)
(else
'neutral)))))

View file

@ -1,76 +1,24 @@
;; Code generated by scripts/generate-east-asian. DO NOT EDIT
(define-module
(runewidth eastasian)
(uniseg eastasian)
#:use-module
(ice-9 hash-table)
#:use-module
(srfi srfi-1)
#:use-module
(runewidth internal)
(uniseg internal)
#:export
(char-set:eastasian-combining
char-set:eastasian-doublewidth
char-set:eastasian-halfwidth
char-set:eastasian-narrow
char-set:eastasian-neutral
char-set:eastasian-ambiguous))
char-set:eastasian-ambiguous
eastasian-charsets))
(define eastasian-ht
(alist->hashq-table
'((combining
(125136 125142)
(122918 122922)
(122915 122916)
(122907 122913)
(122888 122904)
(122880 122886)
(119362 119364)
(119210 119213)
(119173 119179)
(119163 119170)
(119149 119154)
(119143 119145)
(119141 119142)
(92912 92916)
(70512 70516)
(70502 70508)
(70459 70460)
(70400 70401)
(69446 69456)
(69291 69292)
(66422 66426)
(66045 66045)
(65056 65071)
(43232 43249)
(42736 42737)
(42654 42655)
(42612 42621)
(42608 42610)
(42607 42607)
(12441 12442)
(11744 11775)
(11503 11505)
(8421 8432)
(8418 8420)
(8417 8417)
(8413 8416)
(8400 8412)
(7675 7679)
(7616 7673)
(7019 7027)
(6847 6848)
(6846 6846)
(6832 6845)
(6783 6783)
(4957 4959)
(3328 3329)
(3076 3076)
(3072 3072)
(2027 2035)
(1160 1161)
(1155 1159)
(768 879))
(ambiguous
'((ambiguous
(1048576 1114109)
(983040 1048573)
(917760 917999)
@ -329,6 +277,336 @@
(36 36)
(33 35)
(32 32))
(doublewidth
(201547 262141)
(196608 201546)
(195104 196605)
(195102 195103)
(194560 195101)
(191457 194559)
(183984 191456)
(183970 183983)
(178208 183969)
(178206 178207)
(177984 178205)
(177973 177983)
(173824 177972)
(173790 173823)
(131072 173789)
(129744 129750)
(129728 129730)
(129712 129718)
(129680 129704)
(129664 129670)
(129656 129658)
(129648 129652)
(129485 129535)
(129402 129483)
(129351 129400)
(129340 129349)
(129292 129338)
(128992 129003)
(128756 128764)
(128747 128748)
(128725 128727)
(128720 128722)
(128716 128716)
(128640 128709)
(128512 128591)
(128507 128511)
(128420 128420)
(128405 128406)
(128378 128378)
(128336 128359)
(128331 128334)
(128255 128317)
(128066 128252)
(128064 128064)
(128000 128062)
(127995 127999)
(127992 127994)
(127988 127988)
(127968 127984)
(127951 127955)
(127904 127946)
(127870 127891)
(127799 127868)
(127789 127797)
(127744 127776)
(127584 127589)
(127568 127569)
(127552 127560)
(127504 127547)
(127488 127490)
(127377 127386)
(127374 127374)
(127183 127183)
(126980 126980)
(110960 111355)
(110948 110951)
(110928 110930)
(110848 110878)
(110592 110847)
(101632 101640)
(101120 101589)
(100352 101119)
(94208 100343)
(94192 94193)
(94180 94180)
(94179 94179)
(94178 94178)
(94176 94177)
(65509 65510)
(65508 65508)
(65507 65507)
(65506 65506)
(65504 65505)
(65376 65376)
(65375 65375)
(65374 65374)
(65373 65373)
(65372 65372)
(65371 65371)
(65345 65370)
(65344 65344)
(65343 65343)
(65342 65342)
(65341 65341)
(65340 65340)
(65339 65339)
(65313 65338)
(65311 65312)
(65308 65310)
(65306 65307)
(65296 65305)
(65294 65295)
(65293 65293)
(65292 65292)
(65291 65291)
(65290 65290)
(65289 65289)
(65288 65288)
(65285 65287)
(65284 65284)
(65281 65283)
(65130 65131)
(65129 65129)
(65128 65128)
(65124 65126)
(65123 65123)
(65122 65122)
(65119 65121)
(65118 65118)
(65117 65117)
(65116 65116)
(65115 65115)
(65114 65114)
(65113 65113)
(65112 65112)
(65108 65111)
(65104 65106)
(65101 65103)
(65097 65100)
(65096 65096)
(65095 65095)
(65093 65094)
(65092 65092)
(65091 65091)
(65090 65090)
(65089 65089)
(65088 65088)
(65087 65087)
(65086 65086)
(65085 65085)
(65084 65084)
(65083 65083)
(65082 65082)
(65081 65081)
(65080 65080)
(65079 65079)
(65078 65078)
(65077 65077)
(65075 65076)
(65073 65074)
(65072 65072)
(65049 65049)
(65048 65048)
(65047 65047)
(65040 65046)
(64218 64255)
(64112 64217)
(64110 64111)
(63744 64109)
(44032 55203)
(43360 43388)
(42128 42182)
(40982 42124)
(40981 40981)
(40960 40980)
(40957 40959)
(19968 40956)
(13312 19903)
(13056 13311)
(12992 13055)
(12977 12991)
(12938 12976)
(12928 12937)
(12896 12927)
(12881 12895)
(12880 12880)
(12842 12871)
(12832 12841)
(12800 12830)
(12784 12799)
(12736 12771)
(12704 12735)
(12694 12703)
(12690 12693)
(12688 12689)
(12593 12686)
(12549 12591)
(12543 12543)
(12540 12542)
(12539 12539)
(12449 12538)
(12448 12448)
(12447 12447)
(12445 12446)
(12443 12444)
(12353 12438)
(12350 12350)
(12349 12349)
(12348 12348)
(12347 12347)
(12344 12346)
(12342 12343)
(12337 12341)
(12336 12336)
(12334 12335)
(12330 12333)
(12321 12329)
(12320 12320)
(12318 12319)
(12317 12317)
(12316 12316)
(12315 12315)
(12314 12314)
(12313 12313)
(12312 12312)
(12311 12311)
(12310 12310)
(12309 12309)
(12308 12308)
(12306 12307)
(12305 12305)
(12304 12304)
(12303 12303)
(12302 12302)
(12301 12301)
(12300 12300)
(12299 12299)
(12298 12298)
(12297 12297)
(12296 12296)
(12295 12295)
(12294 12294)
(12293 12293)
(12292 12292)
(12289 12291)
(12288 12288)
(12272 12283)
(12032 12245)
(11931 12019)
(11904 11929)
(11093 11093)
(11088 11088)
(11035 11036)
(10175 10175)
(10160 10160)
(10133 10135)
(10071 10071)
(10067 10069)
(10062 10062)
(10060 10060)
(10024 10024)
(9994 9995)
(9989 9989)
(9981 9981)
(9978 9978)
(9973 9973)
(9970 9971)
(9962 9962)
(9940 9940)
(9934 9934)
(9924 9925)
(9917 9918)
(9898 9899)
(9889 9889)
(9875 9875)
(9855 9855)
(9800 9811)
(9748 9749)
(9725 9726)
(9203 9203)
(9200 9200)
(9193 9196)
(9002 9002)
(9001 9001)
(8986 8987)
(4352 4447))
(combining
(125136 125142)
(122918 122922)
(122915 122916)
(122907 122913)
(122888 122904)
(122880 122886)
(119362 119364)
(119210 119213)
(119173 119179)
(119163 119170)
(119149 119154)
(119143 119145)
(119141 119142)
(92912 92916)
(70512 70516)
(70502 70508)
(70459 70460)
(70400 70401)
(69446 69456)
(69291 69292)
(66422 66426)
(66045 66045)
(65056 65071)
(43232 43249)
(42736 42737)
(42654 42655)
(42612 42621)
(42608 42610)
(42607 42607)
(12441 12442)
(11744 11775)
(11503 11505)
(8421 8432)
(8418 8420)
(8417 8417)
(8413 8416)
(8400 8412)
(7675 7679)
(7616 7673)
(7019 7027)
(6847 6848)
(6846 6846)
(6832 6845)
(6783 6783)
(4957 4959)
(3328 3329)
(3076 3076)
(3072 3072)
(2027 2035)
(1160 1161)
(1155 1159)
(768 879))
(neutral
(917536 917631)
(917505 917505)
@ -2220,284 +2498,7 @@
(160 160)
(128 159)
(127 127)
(0 31))
(doublewidth
(201547 262141)
(196608 201546)
(195104 196605)
(195102 195103)
(194560 195101)
(191457 194559)
(183984 191456)
(183970 183983)
(178208 183969)
(178206 178207)
(177984 178205)
(177973 177983)
(173824 177972)
(173790 173823)
(131072 173789)
(129744 129750)
(129728 129730)
(129712 129718)
(129680 129704)
(129664 129670)
(129656 129658)
(129648 129652)
(129485 129535)
(129402 129483)
(129351 129400)
(129340 129349)
(129292 129338)
(128992 129003)
(128756 128764)
(128747 128748)
(128725 128727)
(128720 128722)
(128716 128716)
(128640 128709)
(128512 128591)
(128507 128511)
(128420 128420)
(128405 128406)
(128378 128378)
(128336 128359)
(128331 128334)
(128255 128317)
(128066 128252)
(128064 128064)
(128000 128062)
(127995 127999)
(127992 127994)
(127988 127988)
(127968 127984)
(127951 127955)
(127904 127946)
(127870 127891)
(127799 127868)
(127789 127797)
(127744 127776)
(127584 127589)
(127568 127569)
(127552 127560)
(127504 127547)
(127488 127490)
(127377 127386)
(127374 127374)
(127183 127183)
(126980 126980)
(110960 111355)
(110948 110951)
(110928 110930)
(110848 110878)
(110592 110847)
(101632 101640)
(101120 101589)
(100352 101119)
(94208 100343)
(94192 94193)
(94180 94180)
(94179 94179)
(94178 94178)
(94176 94177)
(65509 65510)
(65508 65508)
(65507 65507)
(65506 65506)
(65504 65505)
(65376 65376)
(65375 65375)
(65374 65374)
(65373 65373)
(65372 65372)
(65371 65371)
(65345 65370)
(65344 65344)
(65343 65343)
(65342 65342)
(65341 65341)
(65340 65340)
(65339 65339)
(65313 65338)
(65311 65312)
(65308 65310)
(65306 65307)
(65296 65305)
(65294 65295)
(65293 65293)
(65292 65292)
(65291 65291)
(65290 65290)
(65289 65289)
(65288 65288)
(65285 65287)
(65284 65284)
(65281 65283)
(65130 65131)
(65129 65129)
(65128 65128)
(65124 65126)
(65123 65123)
(65122 65122)
(65119 65121)
(65118 65118)
(65117 65117)
(65116 65116)
(65115 65115)
(65114 65114)
(65113 65113)
(65112 65112)
(65108 65111)
(65104 65106)
(65101 65103)
(65097 65100)
(65096 65096)
(65095 65095)
(65093 65094)
(65092 65092)
(65091 65091)
(65090 65090)
(65089 65089)
(65088 65088)
(65087 65087)
(65086 65086)
(65085 65085)
(65084 65084)
(65083 65083)
(65082 65082)
(65081 65081)
(65080 65080)
(65079 65079)
(65078 65078)
(65077 65077)
(65075 65076)
(65073 65074)
(65072 65072)
(65049 65049)
(65048 65048)
(65047 65047)
(65040 65046)
(64218 64255)
(64112 64217)
(64110 64111)
(63744 64109)
(44032 55203)
(43360 43388)
(42128 42182)
(40982 42124)
(40981 40981)
(40960 40980)
(40957 40959)
(19968 40956)
(13312 19903)
(13056 13311)
(12992 13055)
(12977 12991)
(12938 12976)
(12928 12937)
(12896 12927)
(12881 12895)
(12880 12880)
(12842 12871)
(12832 12841)
(12800 12830)
(12784 12799)
(12736 12771)
(12704 12735)
(12694 12703)
(12690 12693)
(12688 12689)
(12593 12686)
(12549 12591)
(12543 12543)
(12540 12542)
(12539 12539)
(12449 12538)
(12448 12448)
(12447 12447)
(12445 12446)
(12443 12444)
(12353 12438)
(12350 12350)
(12349 12349)
(12348 12348)
(12347 12347)
(12344 12346)
(12342 12343)
(12337 12341)
(12336 12336)
(12334 12335)
(12330 12333)
(12321 12329)
(12320 12320)
(12318 12319)
(12317 12317)
(12316 12316)
(12315 12315)
(12314 12314)
(12313 12313)
(12312 12312)
(12311 12311)
(12310 12310)
(12309 12309)
(12308 12308)
(12306 12307)
(12305 12305)
(12304 12304)
(12303 12303)
(12302 12302)
(12301 12301)
(12300 12300)
(12299 12299)
(12298 12298)
(12297 12297)
(12296 12296)
(12295 12295)
(12294 12294)
(12293 12293)
(12292 12292)
(12289 12291)
(12288 12288)
(12272 12283)
(12032 12245)
(11931 12019)
(11904 11929)
(11093 11093)
(11088 11088)
(11035 11036)
(10175 10175)
(10160 10160)
(10133 10135)
(10071 10071)
(10067 10069)
(10062 10062)
(10060 10060)
(10024 10024)
(9994 9995)
(9989 9989)
(9981 9981)
(9978 9978)
(9973 9973)
(9970 9971)
(9962 9962)
(9940 9940)
(9934 9934)
(9924 9925)
(9917 9918)
(9898 9899)
(9889 9889)
(9875 9875)
(9855 9855)
(9800 9811)
(9748 9749)
(9725 9726)
(9203 9203)
(9200 9200)
(9193 9196)
(9002 9002)
(9001 9001)
(8986 8987)
(4352 4447)))))
(0 31)))))
(define char-set:eastasian-combining (char-set))
(define char-set:eastasian-doublewidth
@ -2507,6 +2508,15 @@
(define char-set:eastasian-neutral (char-set))
(define char-set:eastasian-ambiguous (char-set))
(define eastasian-charsets
(list (list 'combining char-set:eastasian-combining)
(list 'doublewidth
char-set:eastasian-doublewidth)
(list 'halfwidth char-set:eastasian-halfwidth)
(list 'narrow char-set:eastasian-narrow)
(list 'neutral char-set:eastasian-neutral)
(list 'ambiguous char-set:eastasian-ambiguous)))
(ranges->charset!
eastasian-ht
'combining

View file

@ -1,4 +1,4 @@
(define-module (runewidth eastasian locale)
(define-module (uniseg eastasian locale)
#:use-module (ice-9 regex)
#:export (eastasian-locale?))
@ -21,7 +21,7 @@
"gb2312"))
;; algorithm from:
;; https://github.com/mattn/go-runewidth/blob/master/runewidth_posix.go
;; https://github.com/mattn/go-uniseg/blob/master/uniseg_posix.go
;; For extracting the charset part of the locale string (some locales require this)
;; Note regex and capture group different as guile does not support 'non-capturing group' syntax

View file

@ -1,11 +1,11 @@
;; Code generated by scripts/generate-emoji. DO NOT EDIT
(define-module
(runewidth emoji)
(uniseg emoji)
#:use-module
(srfi srfi-1)
#:export
(char-set:emoji))
(char-set:extended-pictographic))
(define emoji-list
'((130048 131069)
(129751 129791)
@ -497,7 +497,8 @@
(8265 8265)
(8252 8252)))
(define char-set:emoji (char-set))
(define char-set:extended-pictographic
(char-set))
(for-each
(λ (pair)
@ -505,5 +506,5 @@
(first pair)
(+ 1 (second pair))
#t
char-set:emoji))
char-set:extended-pictographic))
emoji-list)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,22 @@
(define-module (runewidth graphemes stream)
#:use-module (srfi srfi-41)
#:use-module (srfi srfi-9 gnu)
#:export (make-grapheme
input->grapheme-stream))
(define-immutable-record-type <grapheme>
(make-grapheme glyphs width sentence-end? word-end?)
grapheme?
(glyphs grapheme-glyphs)
(width grapheme-width)
(sentence-end? grapheme-sentence-end?)
(word-end? grapheme-word-end?))
(define-stream (input->grapheme-stream port)
(unless port
(set! port (current-input-port)))
(define gr (make-grapheme glyphs width sentence-end? word-end?))
(stream-cons c (input->grapheme-stream port)))

View file

@ -1,5 +1,5 @@
(define-module
(runewidth hconfig)
(uniseg hconfig)
#:use-module
(srfi srfi-26)
#:export
@ -21,7 +21,7 @@
(define %copyright '(2024))
(define %gettext-domain "guile-runewidth")
(define %gettext-domain "guile-uniseg")
(define G_ identity)

View file

@ -1,4 +1,4 @@
(define-module (runewidth internal)
(define-module (uniseg internal)
#:use-module (ice-9 peg)
#:use-module (ice-9 textual-ports)
#:use-module (ice-9 exceptions)