Renaming to uniseg
This commit is contained in:
parent
4a93c70b79
commit
f1c03097f1
18 changed files with 1143 additions and 971 deletions
4
HACKING
4
HACKING
|
@ -1,10 +1,10 @@
|
|||
# -*- mode: org; coding: utf-8; -*-
|
||||
|
||||
#+TITLE: Hacking guile-runewidth
|
||||
#+TITLE: Hacking guile-uniseg
|
||||
|
||||
* Contributing
|
||||
|
||||
By far the easiest way to hack on guile-runewidth is to develop using Guix:
|
||||
By far the easiest way to hack on guile-uniseg is to develop using Guix:
|
||||
|
||||
#+BEGIN_SRC bash
|
||||
# Obtain the source code
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
# -*- mode: org; coding: utf-8; -*-
|
||||
|
||||
#+TITLE: README for Guile Runewidth
|
||||
#+TITLE: README for Guile Uniseg
|
||||
|
||||
~guile-runewidth~ is a simple Guile library that provides several new Guile character sets for categorizing
|
||||
~guile-uniseg~ is a simple Guile library that provides several new Guile character sets for categorizing
|
||||
East Asian characters and emoji. This can be useful when determining the width of characters.
|
||||
|
||||
This library is heavily inspired by the golang library ~runewidth~.
|
||||
This library is heavily inspired by the golang libraries ~uniseg~ and ~uniseg~.
|
||||
|
||||
* Scripts
|
||||
There are two scripts which can be used to regenerate the character set files. These are optional to run, since
|
||||
the script's output is stored in the repository at ~runewidth/eastasian.scm~ and ~runewidth/emoji.scm~.
|
||||
the script's output is stored in the repository at ~uniseg/eastasian.scm~ and ~uniseg/emoji.scm~.
|
||||
|
||||
However, should the source change as the unicode spec changes, these can be refreshed.
|
||||
|
|
|
@ -27,7 +27,7 @@ Documentation License''.
|
|||
|
||||
@titlepage
|
||||
@title The Reflow Manual
|
||||
@author
|
||||
@author Vivianne Langdon
|
||||
|
||||
@page
|
||||
@vskip 0pt plus 1filll
|
4
guix.scm
4
guix.scm
|
@ -13,7 +13,7 @@
|
|||
(srfi srfi-1))
|
||||
|
||||
(package
|
||||
(name "guile-runewidth")
|
||||
(name "guile-uniseg")
|
||||
(version "0.1")
|
||||
(source
|
||||
(local-file
|
||||
|
@ -92,6 +92,6 @@
|
|||
(description
|
||||
"A library that provides guile character sets and operations to work with runes that are more than a single character width.")
|
||||
(home-page
|
||||
"https://git.solarpunk.moe/vv/guile-runewidth")
|
||||
"https://git.solarpunk.moe/vv/guile-uniseg")
|
||||
(license license:gpl3+))
|
||||
|
||||
|
|
11
hall.scm
11
hall.scm
|
@ -1,5 +1,5 @@
|
|||
(hall-description
|
||||
(name "runewidth")
|
||||
(name "uniseg")
|
||||
(prefix "guile")
|
||||
(version "0.1")
|
||||
(author "Vivanne Langdon")
|
||||
|
@ -9,7 +9,7 @@
|
|||
(description
|
||||
"A library that provides guile character sets and operations to work with runes that are more than a single character width.")
|
||||
(home-page
|
||||
"https://git.solarpunk.moe/vv/guile-runewidth")
|
||||
"https://git.solarpunk.moe/vv/guile-uniseg")
|
||||
(license gpl3+)
|
||||
(dependencies `())
|
||||
(skip ())
|
||||
|
@ -19,12 +19,13 @@
|
|||
(native-language-support #f)
|
||||
(licensing #f)))
|
||||
(files (libraries
|
||||
((scheme-file "runewidth")
|
||||
((scheme-file "uniseg")
|
||||
(directory
|
||||
"runewidth"
|
||||
"uniseg"
|
||||
((scheme-file "emoji")
|
||||
(directory "eastasian" ((scheme-file "locale")))
|
||||
(scheme-file "eastasian")
|
||||
(directory "graphemes" ((scheme-file "stream")))
|
||||
(scheme-file "graphemes")
|
||||
(scheme-file "internal")))))
|
||||
(tests ((directory
|
||||
|
@ -41,7 +42,7 @@
|
|||
(symlink "README" "README.org")
|
||||
(text-file "HACKING")
|
||||
(text-file "COPYING")
|
||||
(directory "doc" ((texi-file "guile-runewidth")))))
|
||||
(directory "doc" ((texi-file "guile-uniseg")))))
|
||||
(infrastructure
|
||||
((scheme-file "guix")
|
||||
(text-file ".gitignore")
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
;; Can be called with a trailing argument pointing to the file on disk.
|
||||
|
||||
(use-modules
|
||||
(runewidth internal)
|
||||
(uniseg internal)
|
||||
(ice-9 pretty-print)
|
||||
(ice-9 peg)
|
||||
(ice-9 format)
|
||||
|
@ -97,7 +97,7 @@
|
|||
(λ ()
|
||||
(wget-to-lines east-asian-url stdout))))
|
||||
|
||||
(define file "runewidth/eastasian.scm")
|
||||
(define file "uniseg/eastasian.scm")
|
||||
|
||||
(format stdout "Writing to ~a...\n" file)
|
||||
|
||||
|
@ -108,12 +108,13 @@
|
|||
(for-each process-east-asian-line (line-func))
|
||||
|
||||
(pretty-print
|
||||
`(define-module (runewidth eastasian)
|
||||
`(define-module (uniseg eastasian)
|
||||
#:use-module (ice-9 hash-table)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (runewidth internal)
|
||||
#:use-module (uniseg internal)
|
||||
#:export
|
||||
,ea-symbol-names))
|
||||
(,@ea-symbol-names
|
||||
eastasian-charsets)))
|
||||
|
||||
(pretty-print
|
||||
`(define eastasian-ht
|
||||
|
@ -129,6 +130,18 @@
|
|||
|
||||
(display "\n")
|
||||
|
||||
(pretty-print
|
||||
`(define eastasian-charsets
|
||||
(list
|
||||
,@(map
|
||||
(λ (pair)
|
||||
(let ((f (first pair))
|
||||
(s (second pair)))
|
||||
`(list ',f ,s)))
|
||||
ea-sets-and-symbols))))
|
||||
|
||||
(display "\n")
|
||||
|
||||
(for-each
|
||||
(λ (set-pair)
|
||||
(let ((name (first set-pair))
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
;; Can be called with a trailing argument pointing to the file on disk.
|
||||
|
||||
(use-modules
|
||||
(runewidth internal)
|
||||
(uniseg internal)
|
||||
(ice-9 pretty-print)
|
||||
(ice-9 peg)
|
||||
(ice-9 format)
|
||||
|
@ -66,7 +66,7 @@
|
|||
(λ ()
|
||||
(wget-to-lines emoji-url stdout))))
|
||||
|
||||
(define file "runewidth/emoji.scm")
|
||||
(define file "uniseg/emoji.scm")
|
||||
|
||||
(format stdout "Writing to ~a...\n" file)
|
||||
|
||||
|
@ -77,9 +77,9 @@
|
|||
(for-each process-emoji-line (line-func))
|
||||
|
||||
(pretty-print
|
||||
`(define-module (runewidth emoji)
|
||||
`(define-module (uniseg emoji)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:export (char-set:emoji)))
|
||||
#:export (char-set:extended-pictographic)))
|
||||
|
||||
(pretty-print
|
||||
`(define emoji-list ',emoji-list))
|
||||
|
@ -87,7 +87,7 @@
|
|||
(display "\n")
|
||||
|
||||
(pretty-print
|
||||
`(define char-set:emoji (char-set)))
|
||||
`(define char-set:extended-pictographic (char-set)))
|
||||
|
||||
(display "\n")
|
||||
|
||||
|
@ -97,7 +97,7 @@
|
|||
(ucs-range->char-set!
|
||||
(first pair)
|
||||
(+ 1 (second pair))
|
||||
#t char-set:emoji))
|
||||
#t char-set:extended-pictographic))
|
||||
emoji-list))
|
||||
|
||||
(display "Code generation complete.\n" stdout)))
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
;; Can be called with a trailing argument pointing to the file on disk.
|
||||
|
||||
(use-modules
|
||||
(runewidth internal)
|
||||
(uniseg internal)
|
||||
(ice-9 pretty-print)
|
||||
(ice-9 peg)
|
||||
(ice-9 format)
|
||||
|
@ -108,7 +108,7 @@
|
|||
(λ ()
|
||||
(wget-to-lines grapheme-url stdout))))
|
||||
|
||||
(define file "runewidth/graphemes.scm")
|
||||
(define file "uniseg/graphemes.scm")
|
||||
|
||||
(format stdout "Writing to ~a...\n" file)
|
||||
|
||||
|
@ -119,11 +119,12 @@
|
|||
(for-each process-grapheme-line (line-func))
|
||||
|
||||
(pretty-print
|
||||
`(define-module (runewidth graphemes)
|
||||
`(define-module (uniseg graphemes)
|
||||
#:use-module (ice-9 hash-table)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (runewidth internal)
|
||||
#:export ,grapheme-symbol-names))
|
||||
#:use-module (uniseg internal)
|
||||
#:export ( ,@grapheme-symbol-names
|
||||
grapheme-charsets)))
|
||||
|
||||
|
||||
(pretty-print
|
||||
|
@ -132,6 +133,26 @@
|
|||
|
||||
(display "\n")
|
||||
|
||||
(for-each
|
||||
(λ (sym)
|
||||
(pretty-print
|
||||
`(define ,sym (char-set))))
|
||||
grapheme-symbol-names)
|
||||
|
||||
(display "\n")
|
||||
|
||||
(pretty-print
|
||||
`(define grapheme-charsets
|
||||
(list
|
||||
,@(map
|
||||
(λ (pair)
|
||||
(let ((f (first pair))
|
||||
(s (second pair)))
|
||||
`(list ',f ,s)))
|
||||
grapheme-sets-and-symbols))))
|
||||
|
||||
(display "\n")
|
||||
|
||||
(for-each
|
||||
(λ (set-pair)
|
||||
(let ((name (first set-pair))
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
(define-module (tests test-eastasian-locale)
|
||||
#:use-module (runewidth eastasian locale)
|
||||
#:use-module (uniseg eastasian locale)
|
||||
#:use-module (srfi srfi-64))
|
||||
|
||||
(test-begin "test-eastasian-locale")
|
||||
|
|
60
uniseg.scm
Normal file
60
uniseg.scm
Normal file
|
@ -0,0 +1,60 @@
|
|||
(define-module (uniseg)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (uniseg emoji)
|
||||
#:use-module (uniseg graphemes)
|
||||
#:use-module (uniseg eastasian)
|
||||
#:export (emoji?
|
||||
grapheme-property
|
||||
eastasian-property))
|
||||
|
||||
(define (emoji? char)
|
||||
(char-set-contains? char-set:extended-pictographic char))
|
||||
|
||||
(define (get-prop sets char)
|
||||
(let ((pair
|
||||
(find
|
||||
(λ (p) (char-set-contains? (second p) char))
|
||||
sets)))
|
||||
(and pair
|
||||
(first pair))))
|
||||
|
||||
(define (grapheme-property char)
|
||||
"Find the unicode grapheme cluster property, as defined by https://www.unicode.org/reports/tr44/"
|
||||
(or (get-prop grapheme-charsets char)
|
||||
'other))
|
||||
|
||||
|
||||
(define (eastasian-property char)
|
||||
(define (<==> lo hi)
|
||||
"Create a range predicate, inclusive between two numbers"
|
||||
(λ (num)
|
||||
(and (>= num lo)
|
||||
(<= num hi))))
|
||||
|
||||
(define range-unified-ideographs-ext-a?
|
||||
(<==> #x3400 #x4DBF))
|
||||
(define range-unified-ideographs?
|
||||
(<==> #x4E00 #x9FFF))
|
||||
(define range-compatibility-ideographs?
|
||||
(<==> #xF900 #xFAFF))
|
||||
(define range-plane-2?
|
||||
(<==> #x20000 #x2FFFD))
|
||||
(define range-plane-3?
|
||||
(<==> #x30000 #x3FFFD))
|
||||
|
||||
;; The spec defines any values not in the table be wide, if
|
||||
;; they are in certain ranges. So let's take care of that here.
|
||||
;; These characters are not in the char-sets as they are undesignated.
|
||||
;; See comments in unicode's EastAsianWidth.txt
|
||||
(or (get-prop eastasian-charsets char)
|
||||
(let ((int (char->integer char)))
|
||||
(match int
|
||||
((or (? range-unified-ideographs-ext-a?)
|
||||
(? range-unified-ideographs?)
|
||||
(? range-compatibility-ideographs?)
|
||||
(? range-plane-2?)
|
||||
(? range-plane-3?))
|
||||
'doublewidth)
|
||||
(else
|
||||
'neutral)))))
|
|
@ -1,76 +1,24 @@
|
|||
;; Code generated by scripts/generate-east-asian. DO NOT EDIT
|
||||
|
||||
(define-module
|
||||
(runewidth eastasian)
|
||||
(uniseg eastasian)
|
||||
#:use-module
|
||||
(ice-9 hash-table)
|
||||
#:use-module
|
||||
(srfi srfi-1)
|
||||
#:use-module
|
||||
(runewidth internal)
|
||||
(uniseg internal)
|
||||
#:export
|
||||
(char-set:eastasian-combining
|
||||
char-set:eastasian-doublewidth
|
||||
char-set:eastasian-halfwidth
|
||||
char-set:eastasian-narrow
|
||||
char-set:eastasian-neutral
|
||||
char-set:eastasian-ambiguous))
|
||||
char-set:eastasian-ambiguous
|
||||
eastasian-charsets))
|
||||
(define eastasian-ht
|
||||
(alist->hashq-table
|
||||
'((combining
|
||||
(125136 125142)
|
||||
(122918 122922)
|
||||
(122915 122916)
|
||||
(122907 122913)
|
||||
(122888 122904)
|
||||
(122880 122886)
|
||||
(119362 119364)
|
||||
(119210 119213)
|
||||
(119173 119179)
|
||||
(119163 119170)
|
||||
(119149 119154)
|
||||
(119143 119145)
|
||||
(119141 119142)
|
||||
(92912 92916)
|
||||
(70512 70516)
|
||||
(70502 70508)
|
||||
(70459 70460)
|
||||
(70400 70401)
|
||||
(69446 69456)
|
||||
(69291 69292)
|
||||
(66422 66426)
|
||||
(66045 66045)
|
||||
(65056 65071)
|
||||
(43232 43249)
|
||||
(42736 42737)
|
||||
(42654 42655)
|
||||
(42612 42621)
|
||||
(42608 42610)
|
||||
(42607 42607)
|
||||
(12441 12442)
|
||||
(11744 11775)
|
||||
(11503 11505)
|
||||
(8421 8432)
|
||||
(8418 8420)
|
||||
(8417 8417)
|
||||
(8413 8416)
|
||||
(8400 8412)
|
||||
(7675 7679)
|
||||
(7616 7673)
|
||||
(7019 7027)
|
||||
(6847 6848)
|
||||
(6846 6846)
|
||||
(6832 6845)
|
||||
(6783 6783)
|
||||
(4957 4959)
|
||||
(3328 3329)
|
||||
(3076 3076)
|
||||
(3072 3072)
|
||||
(2027 2035)
|
||||
(1160 1161)
|
||||
(1155 1159)
|
||||
(768 879))
|
||||
(ambiguous
|
||||
'((ambiguous
|
||||
(1048576 1114109)
|
||||
(983040 1048573)
|
||||
(917760 917999)
|
||||
|
@ -329,6 +277,336 @@
|
|||
(36 36)
|
||||
(33 35)
|
||||
(32 32))
|
||||
(doublewidth
|
||||
(201547 262141)
|
||||
(196608 201546)
|
||||
(195104 196605)
|
||||
(195102 195103)
|
||||
(194560 195101)
|
||||
(191457 194559)
|
||||
(183984 191456)
|
||||
(183970 183983)
|
||||
(178208 183969)
|
||||
(178206 178207)
|
||||
(177984 178205)
|
||||
(177973 177983)
|
||||
(173824 177972)
|
||||
(173790 173823)
|
||||
(131072 173789)
|
||||
(129744 129750)
|
||||
(129728 129730)
|
||||
(129712 129718)
|
||||
(129680 129704)
|
||||
(129664 129670)
|
||||
(129656 129658)
|
||||
(129648 129652)
|
||||
(129485 129535)
|
||||
(129402 129483)
|
||||
(129351 129400)
|
||||
(129340 129349)
|
||||
(129292 129338)
|
||||
(128992 129003)
|
||||
(128756 128764)
|
||||
(128747 128748)
|
||||
(128725 128727)
|
||||
(128720 128722)
|
||||
(128716 128716)
|
||||
(128640 128709)
|
||||
(128512 128591)
|
||||
(128507 128511)
|
||||
(128420 128420)
|
||||
(128405 128406)
|
||||
(128378 128378)
|
||||
(128336 128359)
|
||||
(128331 128334)
|
||||
(128255 128317)
|
||||
(128066 128252)
|
||||
(128064 128064)
|
||||
(128000 128062)
|
||||
(127995 127999)
|
||||
(127992 127994)
|
||||
(127988 127988)
|
||||
(127968 127984)
|
||||
(127951 127955)
|
||||
(127904 127946)
|
||||
(127870 127891)
|
||||
(127799 127868)
|
||||
(127789 127797)
|
||||
(127744 127776)
|
||||
(127584 127589)
|
||||
(127568 127569)
|
||||
(127552 127560)
|
||||
(127504 127547)
|
||||
(127488 127490)
|
||||
(127377 127386)
|
||||
(127374 127374)
|
||||
(127183 127183)
|
||||
(126980 126980)
|
||||
(110960 111355)
|
||||
(110948 110951)
|
||||
(110928 110930)
|
||||
(110848 110878)
|
||||
(110592 110847)
|
||||
(101632 101640)
|
||||
(101120 101589)
|
||||
(100352 101119)
|
||||
(94208 100343)
|
||||
(94192 94193)
|
||||
(94180 94180)
|
||||
(94179 94179)
|
||||
(94178 94178)
|
||||
(94176 94177)
|
||||
(65509 65510)
|
||||
(65508 65508)
|
||||
(65507 65507)
|
||||
(65506 65506)
|
||||
(65504 65505)
|
||||
(65376 65376)
|
||||
(65375 65375)
|
||||
(65374 65374)
|
||||
(65373 65373)
|
||||
(65372 65372)
|
||||
(65371 65371)
|
||||
(65345 65370)
|
||||
(65344 65344)
|
||||
(65343 65343)
|
||||
(65342 65342)
|
||||
(65341 65341)
|
||||
(65340 65340)
|
||||
(65339 65339)
|
||||
(65313 65338)
|
||||
(65311 65312)
|
||||
(65308 65310)
|
||||
(65306 65307)
|
||||
(65296 65305)
|
||||
(65294 65295)
|
||||
(65293 65293)
|
||||
(65292 65292)
|
||||
(65291 65291)
|
||||
(65290 65290)
|
||||
(65289 65289)
|
||||
(65288 65288)
|
||||
(65285 65287)
|
||||
(65284 65284)
|
||||
(65281 65283)
|
||||
(65130 65131)
|
||||
(65129 65129)
|
||||
(65128 65128)
|
||||
(65124 65126)
|
||||
(65123 65123)
|
||||
(65122 65122)
|
||||
(65119 65121)
|
||||
(65118 65118)
|
||||
(65117 65117)
|
||||
(65116 65116)
|
||||
(65115 65115)
|
||||
(65114 65114)
|
||||
(65113 65113)
|
||||
(65112 65112)
|
||||
(65108 65111)
|
||||
(65104 65106)
|
||||
(65101 65103)
|
||||
(65097 65100)
|
||||
(65096 65096)
|
||||
(65095 65095)
|
||||
(65093 65094)
|
||||
(65092 65092)
|
||||
(65091 65091)
|
||||
(65090 65090)
|
||||
(65089 65089)
|
||||
(65088 65088)
|
||||
(65087 65087)
|
||||
(65086 65086)
|
||||
(65085 65085)
|
||||
(65084 65084)
|
||||
(65083 65083)
|
||||
(65082 65082)
|
||||
(65081 65081)
|
||||
(65080 65080)
|
||||
(65079 65079)
|
||||
(65078 65078)
|
||||
(65077 65077)
|
||||
(65075 65076)
|
||||
(65073 65074)
|
||||
(65072 65072)
|
||||
(65049 65049)
|
||||
(65048 65048)
|
||||
(65047 65047)
|
||||
(65040 65046)
|
||||
(64218 64255)
|
||||
(64112 64217)
|
||||
(64110 64111)
|
||||
(63744 64109)
|
||||
(44032 55203)
|
||||
(43360 43388)
|
||||
(42128 42182)
|
||||
(40982 42124)
|
||||
(40981 40981)
|
||||
(40960 40980)
|
||||
(40957 40959)
|
||||
(19968 40956)
|
||||
(13312 19903)
|
||||
(13056 13311)
|
||||
(12992 13055)
|
||||
(12977 12991)
|
||||
(12938 12976)
|
||||
(12928 12937)
|
||||
(12896 12927)
|
||||
(12881 12895)
|
||||
(12880 12880)
|
||||
(12842 12871)
|
||||
(12832 12841)
|
||||
(12800 12830)
|
||||
(12784 12799)
|
||||
(12736 12771)
|
||||
(12704 12735)
|
||||
(12694 12703)
|
||||
(12690 12693)
|
||||
(12688 12689)
|
||||
(12593 12686)
|
||||
(12549 12591)
|
||||
(12543 12543)
|
||||
(12540 12542)
|
||||
(12539 12539)
|
||||
(12449 12538)
|
||||
(12448 12448)
|
||||
(12447 12447)
|
||||
(12445 12446)
|
||||
(12443 12444)
|
||||
(12353 12438)
|
||||
(12350 12350)
|
||||
(12349 12349)
|
||||
(12348 12348)
|
||||
(12347 12347)
|
||||
(12344 12346)
|
||||
(12342 12343)
|
||||
(12337 12341)
|
||||
(12336 12336)
|
||||
(12334 12335)
|
||||
(12330 12333)
|
||||
(12321 12329)
|
||||
(12320 12320)
|
||||
(12318 12319)
|
||||
(12317 12317)
|
||||
(12316 12316)
|
||||
(12315 12315)
|
||||
(12314 12314)
|
||||
(12313 12313)
|
||||
(12312 12312)
|
||||
(12311 12311)
|
||||
(12310 12310)
|
||||
(12309 12309)
|
||||
(12308 12308)
|
||||
(12306 12307)
|
||||
(12305 12305)
|
||||
(12304 12304)
|
||||
(12303 12303)
|
||||
(12302 12302)
|
||||
(12301 12301)
|
||||
(12300 12300)
|
||||
(12299 12299)
|
||||
(12298 12298)
|
||||
(12297 12297)
|
||||
(12296 12296)
|
||||
(12295 12295)
|
||||
(12294 12294)
|
||||
(12293 12293)
|
||||
(12292 12292)
|
||||
(12289 12291)
|
||||
(12288 12288)
|
||||
(12272 12283)
|
||||
(12032 12245)
|
||||
(11931 12019)
|
||||
(11904 11929)
|
||||
(11093 11093)
|
||||
(11088 11088)
|
||||
(11035 11036)
|
||||
(10175 10175)
|
||||
(10160 10160)
|
||||
(10133 10135)
|
||||
(10071 10071)
|
||||
(10067 10069)
|
||||
(10062 10062)
|
||||
(10060 10060)
|
||||
(10024 10024)
|
||||
(9994 9995)
|
||||
(9989 9989)
|
||||
(9981 9981)
|
||||
(9978 9978)
|
||||
(9973 9973)
|
||||
(9970 9971)
|
||||
(9962 9962)
|
||||
(9940 9940)
|
||||
(9934 9934)
|
||||
(9924 9925)
|
||||
(9917 9918)
|
||||
(9898 9899)
|
||||
(9889 9889)
|
||||
(9875 9875)
|
||||
(9855 9855)
|
||||
(9800 9811)
|
||||
(9748 9749)
|
||||
(9725 9726)
|
||||
(9203 9203)
|
||||
(9200 9200)
|
||||
(9193 9196)
|
||||
(9002 9002)
|
||||
(9001 9001)
|
||||
(8986 8987)
|
||||
(4352 4447))
|
||||
(combining
|
||||
(125136 125142)
|
||||
(122918 122922)
|
||||
(122915 122916)
|
||||
(122907 122913)
|
||||
(122888 122904)
|
||||
(122880 122886)
|
||||
(119362 119364)
|
||||
(119210 119213)
|
||||
(119173 119179)
|
||||
(119163 119170)
|
||||
(119149 119154)
|
||||
(119143 119145)
|
||||
(119141 119142)
|
||||
(92912 92916)
|
||||
(70512 70516)
|
||||
(70502 70508)
|
||||
(70459 70460)
|
||||
(70400 70401)
|
||||
(69446 69456)
|
||||
(69291 69292)
|
||||
(66422 66426)
|
||||
(66045 66045)
|
||||
(65056 65071)
|
||||
(43232 43249)
|
||||
(42736 42737)
|
||||
(42654 42655)
|
||||
(42612 42621)
|
||||
(42608 42610)
|
||||
(42607 42607)
|
||||
(12441 12442)
|
||||
(11744 11775)
|
||||
(11503 11505)
|
||||
(8421 8432)
|
||||
(8418 8420)
|
||||
(8417 8417)
|
||||
(8413 8416)
|
||||
(8400 8412)
|
||||
(7675 7679)
|
||||
(7616 7673)
|
||||
(7019 7027)
|
||||
(6847 6848)
|
||||
(6846 6846)
|
||||
(6832 6845)
|
||||
(6783 6783)
|
||||
(4957 4959)
|
||||
(3328 3329)
|
||||
(3076 3076)
|
||||
(3072 3072)
|
||||
(2027 2035)
|
||||
(1160 1161)
|
||||
(1155 1159)
|
||||
(768 879))
|
||||
(neutral
|
||||
(917536 917631)
|
||||
(917505 917505)
|
||||
|
@ -2220,284 +2498,7 @@
|
|||
(160 160)
|
||||
(128 159)
|
||||
(127 127)
|
||||
(0 31))
|
||||
(doublewidth
|
||||
(201547 262141)
|
||||
(196608 201546)
|
||||
(195104 196605)
|
||||
(195102 195103)
|
||||
(194560 195101)
|
||||
(191457 194559)
|
||||
(183984 191456)
|
||||
(183970 183983)
|
||||
(178208 183969)
|
||||
(178206 178207)
|
||||
(177984 178205)
|
||||
(177973 177983)
|
||||
(173824 177972)
|
||||
(173790 173823)
|
||||
(131072 173789)
|
||||
(129744 129750)
|
||||
(129728 129730)
|
||||
(129712 129718)
|
||||
(129680 129704)
|
||||
(129664 129670)
|
||||
(129656 129658)
|
||||
(129648 129652)
|
||||
(129485 129535)
|
||||
(129402 129483)
|
||||
(129351 129400)
|
||||
(129340 129349)
|
||||
(129292 129338)
|
||||
(128992 129003)
|
||||
(128756 128764)
|
||||
(128747 128748)
|
||||
(128725 128727)
|
||||
(128720 128722)
|
||||
(128716 128716)
|
||||
(128640 128709)
|
||||
(128512 128591)
|
||||
(128507 128511)
|
||||
(128420 128420)
|
||||
(128405 128406)
|
||||
(128378 128378)
|
||||
(128336 128359)
|
||||
(128331 128334)
|
||||
(128255 128317)
|
||||
(128066 128252)
|
||||
(128064 128064)
|
||||
(128000 128062)
|
||||
(127995 127999)
|
||||
(127992 127994)
|
||||
(127988 127988)
|
||||
(127968 127984)
|
||||
(127951 127955)
|
||||
(127904 127946)
|
||||
(127870 127891)
|
||||
(127799 127868)
|
||||
(127789 127797)
|
||||
(127744 127776)
|
||||
(127584 127589)
|
||||
(127568 127569)
|
||||
(127552 127560)
|
||||
(127504 127547)
|
||||
(127488 127490)
|
||||
(127377 127386)
|
||||
(127374 127374)
|
||||
(127183 127183)
|
||||
(126980 126980)
|
||||
(110960 111355)
|
||||
(110948 110951)
|
||||
(110928 110930)
|
||||
(110848 110878)
|
||||
(110592 110847)
|
||||
(101632 101640)
|
||||
(101120 101589)
|
||||
(100352 101119)
|
||||
(94208 100343)
|
||||
(94192 94193)
|
||||
(94180 94180)
|
||||
(94179 94179)
|
||||
(94178 94178)
|
||||
(94176 94177)
|
||||
(65509 65510)
|
||||
(65508 65508)
|
||||
(65507 65507)
|
||||
(65506 65506)
|
||||
(65504 65505)
|
||||
(65376 65376)
|
||||
(65375 65375)
|
||||
(65374 65374)
|
||||
(65373 65373)
|
||||
(65372 65372)
|
||||
(65371 65371)
|
||||
(65345 65370)
|
||||
(65344 65344)
|
||||
(65343 65343)
|
||||
(65342 65342)
|
||||
(65341 65341)
|
||||
(65340 65340)
|
||||
(65339 65339)
|
||||
(65313 65338)
|
||||
(65311 65312)
|
||||
(65308 65310)
|
||||
(65306 65307)
|
||||
(65296 65305)
|
||||
(65294 65295)
|
||||
(65293 65293)
|
||||
(65292 65292)
|
||||
(65291 65291)
|
||||
(65290 65290)
|
||||
(65289 65289)
|
||||
(65288 65288)
|
||||
(65285 65287)
|
||||
(65284 65284)
|
||||
(65281 65283)
|
||||
(65130 65131)
|
||||
(65129 65129)
|
||||
(65128 65128)
|
||||
(65124 65126)
|
||||
(65123 65123)
|
||||
(65122 65122)
|
||||
(65119 65121)
|
||||
(65118 65118)
|
||||
(65117 65117)
|
||||
(65116 65116)
|
||||
(65115 65115)
|
||||
(65114 65114)
|
||||
(65113 65113)
|
||||
(65112 65112)
|
||||
(65108 65111)
|
||||
(65104 65106)
|
||||
(65101 65103)
|
||||
(65097 65100)
|
||||
(65096 65096)
|
||||
(65095 65095)
|
||||
(65093 65094)
|
||||
(65092 65092)
|
||||
(65091 65091)
|
||||
(65090 65090)
|
||||
(65089 65089)
|
||||
(65088 65088)
|
||||
(65087 65087)
|
||||
(65086 65086)
|
||||
(65085 65085)
|
||||
(65084 65084)
|
||||
(65083 65083)
|
||||
(65082 65082)
|
||||
(65081 65081)
|
||||
(65080 65080)
|
||||
(65079 65079)
|
||||
(65078 65078)
|
||||
(65077 65077)
|
||||
(65075 65076)
|
||||
(65073 65074)
|
||||
(65072 65072)
|
||||
(65049 65049)
|
||||
(65048 65048)
|
||||
(65047 65047)
|
||||
(65040 65046)
|
||||
(64218 64255)
|
||||
(64112 64217)
|
||||
(64110 64111)
|
||||
(63744 64109)
|
||||
(44032 55203)
|
||||
(43360 43388)
|
||||
(42128 42182)
|
||||
(40982 42124)
|
||||
(40981 40981)
|
||||
(40960 40980)
|
||||
(40957 40959)
|
||||
(19968 40956)
|
||||
(13312 19903)
|
||||
(13056 13311)
|
||||
(12992 13055)
|
||||
(12977 12991)
|
||||
(12938 12976)
|
||||
(12928 12937)
|
||||
(12896 12927)
|
||||
(12881 12895)
|
||||
(12880 12880)
|
||||
(12842 12871)
|
||||
(12832 12841)
|
||||
(12800 12830)
|
||||
(12784 12799)
|
||||
(12736 12771)
|
||||
(12704 12735)
|
||||
(12694 12703)
|
||||
(12690 12693)
|
||||
(12688 12689)
|
||||
(12593 12686)
|
||||
(12549 12591)
|
||||
(12543 12543)
|
||||
(12540 12542)
|
||||
(12539 12539)
|
||||
(12449 12538)
|
||||
(12448 12448)
|
||||
(12447 12447)
|
||||
(12445 12446)
|
||||
(12443 12444)
|
||||
(12353 12438)
|
||||
(12350 12350)
|
||||
(12349 12349)
|
||||
(12348 12348)
|
||||
(12347 12347)
|
||||
(12344 12346)
|
||||
(12342 12343)
|
||||
(12337 12341)
|
||||
(12336 12336)
|
||||
(12334 12335)
|
||||
(12330 12333)
|
||||
(12321 12329)
|
||||
(12320 12320)
|
||||
(12318 12319)
|
||||
(12317 12317)
|
||||
(12316 12316)
|
||||
(12315 12315)
|
||||
(12314 12314)
|
||||
(12313 12313)
|
||||
(12312 12312)
|
||||
(12311 12311)
|
||||
(12310 12310)
|
||||
(12309 12309)
|
||||
(12308 12308)
|
||||
(12306 12307)
|
||||
(12305 12305)
|
||||
(12304 12304)
|
||||
(12303 12303)
|
||||
(12302 12302)
|
||||
(12301 12301)
|
||||
(12300 12300)
|
||||
(12299 12299)
|
||||
(12298 12298)
|
||||
(12297 12297)
|
||||
(12296 12296)
|
||||
(12295 12295)
|
||||
(12294 12294)
|
||||
(12293 12293)
|
||||
(12292 12292)
|
||||
(12289 12291)
|
||||
(12288 12288)
|
||||
(12272 12283)
|
||||
(12032 12245)
|
||||
(11931 12019)
|
||||
(11904 11929)
|
||||
(11093 11093)
|
||||
(11088 11088)
|
||||
(11035 11036)
|
||||
(10175 10175)
|
||||
(10160 10160)
|
||||
(10133 10135)
|
||||
(10071 10071)
|
||||
(10067 10069)
|
||||
(10062 10062)
|
||||
(10060 10060)
|
||||
(10024 10024)
|
||||
(9994 9995)
|
||||
(9989 9989)
|
||||
(9981 9981)
|
||||
(9978 9978)
|
||||
(9973 9973)
|
||||
(9970 9971)
|
||||
(9962 9962)
|
||||
(9940 9940)
|
||||
(9934 9934)
|
||||
(9924 9925)
|
||||
(9917 9918)
|
||||
(9898 9899)
|
||||
(9889 9889)
|
||||
(9875 9875)
|
||||
(9855 9855)
|
||||
(9800 9811)
|
||||
(9748 9749)
|
||||
(9725 9726)
|
||||
(9203 9203)
|
||||
(9200 9200)
|
||||
(9193 9196)
|
||||
(9002 9002)
|
||||
(9001 9001)
|
||||
(8986 8987)
|
||||
(4352 4447)))))
|
||||
(0 31)))))
|
||||
|
||||
(define char-set:eastasian-combining (char-set))
|
||||
(define char-set:eastasian-doublewidth
|
||||
|
@ -2507,6 +2508,15 @@
|
|||
(define char-set:eastasian-neutral (char-set))
|
||||
(define char-set:eastasian-ambiguous (char-set))
|
||||
|
||||
(define eastasian-charsets
|
||||
(list (list 'combining char-set:eastasian-combining)
|
||||
(list 'doublewidth
|
||||
char-set:eastasian-doublewidth)
|
||||
(list 'halfwidth char-set:eastasian-halfwidth)
|
||||
(list 'narrow char-set:eastasian-narrow)
|
||||
(list 'neutral char-set:eastasian-neutral)
|
||||
(list 'ambiguous char-set:eastasian-ambiguous)))
|
||||
|
||||
(ranges->charset!
|
||||
eastasian-ht
|
||||
'combining
|
|
@ -1,4 +1,4 @@
|
|||
(define-module (runewidth eastasian locale)
|
||||
(define-module (uniseg eastasian locale)
|
||||
#:use-module (ice-9 regex)
|
||||
#:export (eastasian-locale?))
|
||||
|
||||
|
@ -21,7 +21,7 @@
|
|||
"gb2312"))
|
||||
|
||||
;; algorithm from:
|
||||
;; https://github.com/mattn/go-runewidth/blob/master/runewidth_posix.go
|
||||
;; https://github.com/mattn/go-uniseg/blob/master/uniseg_posix.go
|
||||
|
||||
;; For extracting the charset part of the locale string (some locales require this)
|
||||
;; Note regex and capture group different as guile does not support 'non-capturing group' syntax
|
|
@ -1,11 +1,11 @@
|
|||
;; Code generated by scripts/generate-emoji. DO NOT EDIT
|
||||
|
||||
(define-module
|
||||
(runewidth emoji)
|
||||
(uniseg emoji)
|
||||
#:use-module
|
||||
(srfi srfi-1)
|
||||
#:export
|
||||
(char-set:emoji))
|
||||
(char-set:extended-pictographic))
|
||||
(define emoji-list
|
||||
'((130048 131069)
|
||||
(129751 129791)
|
||||
|
@ -497,7 +497,8 @@
|
|||
(8265 8265)
|
||||
(8252 8252)))
|
||||
|
||||
(define char-set:emoji (char-set))
|
||||
(define char-set:extended-pictographic
|
||||
(char-set))
|
||||
|
||||
(for-each
|
||||
(λ (pair)
|
||||
|
@ -505,5 +506,5 @@
|
|||
(first pair)
|
||||
(+ 1 (second pair))
|
||||
#t
|
||||
char-set:emoji))
|
||||
char-set:extended-pictographic))
|
||||
emoji-list)
|
File diff suppressed because it is too large
Load diff
22
uniseg/graphemes/stream.scm
Normal file
22
uniseg/graphemes/stream.scm
Normal file
|
@ -0,0 +1,22 @@
|
|||
(define-module (runewidth graphemes stream)
|
||||
#:use-module (srfi srfi-41)
|
||||
#:use-module (srfi srfi-9 gnu)
|
||||
#:export (make-grapheme
|
||||
|
||||
input->grapheme-stream))
|
||||
|
||||
(define-immutable-record-type <grapheme>
|
||||
(make-grapheme glyphs width sentence-end? word-end?)
|
||||
grapheme?
|
||||
(glyphs grapheme-glyphs)
|
||||
(width grapheme-width)
|
||||
(sentence-end? grapheme-sentence-end?)
|
||||
(word-end? grapheme-word-end?))
|
||||
|
||||
(define-stream (input->grapheme-stream port)
|
||||
(unless port
|
||||
(set! port (current-input-port)))
|
||||
|
||||
(define gr (make-grapheme glyphs width sentence-end? word-end?))
|
||||
|
||||
(stream-cons c (input->grapheme-stream port)))
|
|
@ -1,5 +1,5 @@
|
|||
(define-module
|
||||
(runewidth hconfig)
|
||||
(uniseg hconfig)
|
||||
#:use-module
|
||||
(srfi srfi-26)
|
||||
#:export
|
||||
|
@ -21,7 +21,7 @@
|
|||
|
||||
(define %copyright '(2024))
|
||||
|
||||
(define %gettext-domain "guile-runewidth")
|
||||
(define %gettext-domain "guile-uniseg")
|
||||
|
||||
(define G_ identity)
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
(define-module (runewidth internal)
|
||||
(define-module (uniseg internal)
|
||||
#:use-module (ice-9 peg)
|
||||
#:use-module (ice-9 textual-ports)
|
||||
#:use-module (ice-9 exceptions)
|
Loading…
Reference in a new issue