Add emoji and actually get east asian generator working
This commit is contained in:
parent
10c8421f29
commit
470b20a74c
|
@ -66,3 +66,5 @@ stamp-h[0-9]
|
|||
tmp
|
||||
/.version
|
||||
/doc/stamp-[0-9]
|
||||
scripts/*
|
||||
!scripts/*.in
|
57
guix.scm
57
guix.scm
|
@ -26,7 +26,62 @@
|
|||
(string-contains file my-string))
|
||||
(list ".git" ".dir-locals.el" "guix.scm"))))))
|
||||
(build-system gnu-build-system)
|
||||
(arguments `())
|
||||
(arguments
|
||||
`(#:modules
|
||||
((ice-9 match)
|
||||
(ice-9 ftw)
|
||||
,@%gnu-build-system-modules)
|
||||
#:phases
|
||||
(modify-phases
|
||||
%standard-phases
|
||||
(add-after
|
||||
'install
|
||||
'hall-wrap-binaries
|
||||
(lambda* (#:key inputs outputs #:allow-other-keys)
|
||||
(let* ((compiled-dir
|
||||
(lambda (out version)
|
||||
(string-append
|
||||
out
|
||||
"/lib/guile/"
|
||||
version
|
||||
"/site-ccache")))
|
||||
(uncompiled-dir
|
||||
(lambda (out version)
|
||||
(string-append
|
||||
out
|
||||
"/share/guile/site"
|
||||
(if (string-null? version) "" "/")
|
||||
version)))
|
||||
(dep-path
|
||||
(lambda (env modules path)
|
||||
(list env
|
||||
":"
|
||||
'prefix
|
||||
(cons modules
|
||||
(map (lambda (input)
|
||||
(string-append
|
||||
(assoc-ref inputs input)
|
||||
path))
|
||||
,''())))))
|
||||
(out (assoc-ref outputs "out"))
|
||||
(bin (string-append out "/bin/"))
|
||||
(site (uncompiled-dir out "")))
|
||||
(match (scandir site)
|
||||
(("." ".." version)
|
||||
(for-each
|
||||
(lambda (file)
|
||||
(wrap-program
|
||||
(string-append bin file)
|
||||
(dep-path
|
||||
"GUILE_LOAD_PATH"
|
||||
(uncompiled-dir out version)
|
||||
(uncompiled-dir "" version))
|
||||
(dep-path
|
||||
"GUILE_LOAD_COMPILED_PATH"
|
||||
(compiled-dir out version)
|
||||
(compiled-dir "" version))))
|
||||
,''("generate-east-asian" "generate-emoji"))
|
||||
#t))))))))
|
||||
(native-inputs
|
||||
(list autoconf automake pkg-config texinfo))
|
||||
(inputs (list guile-3.0))
|
||||
|
|
14
hall.scm
14
hall.scm
|
@ -19,10 +19,18 @@
|
|||
(native-language-support #f)
|
||||
(licensing #f)))
|
||||
(files (libraries
|
||||
((directory "runewidth" ())
|
||||
((directory
|
||||
"runewidth"
|
||||
((scheme-file "emoji")
|
||||
(scheme-file "eastasian")
|
||||
(scheme-file "internal")))
|
||||
(scheme-file "runewidth")))
|
||||
(tests ((directory "tests" ((scheme-file "test-posix")))))
|
||||
(programs ())
|
||||
(programs
|
||||
((directory
|
||||
"scripts"
|
||||
((in-file "generate-east-asian")
|
||||
(in-file "generate-emoji")))))
|
||||
(documentation
|
||||
((org-file "README")
|
||||
(symlink "README" "README.org")
|
||||
|
@ -33,4 +41,4 @@
|
|||
((scheme-file "guix")
|
||||
(text-file ".gitignore")
|
||||
(scheme-file "hall")
|
||||
(directory "tests" ())))))
|
||||
(directory "tests" ((scheme-file "test-posix")))))))
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,509 @@
|
|||
;; Code generated by script/generate. DO NOT EDIT
|
||||
|
||||
(define-module
|
||||
(runewidth emoji)
|
||||
#:use-module
|
||||
(srfi srfi-1)
|
||||
#:export
|
||||
(char-set:emoji))
|
||||
(define emoji-list
|
||||
'((130048 131069)
|
||||
(129751 129791)
|
||||
(129744 129750)
|
||||
(129731 129743)
|
||||
(129728 129730)
|
||||
(129719 129727)
|
||||
(129712 129718)
|
||||
(129705 129711)
|
||||
(129686 129704)
|
||||
(129680 129685)
|
||||
(129671 129679)
|
||||
(129667 129670)
|
||||
(129664 129666)
|
||||
(129659 129663)
|
||||
(129656 129658)
|
||||
(129653 129655)
|
||||
(129652 129652)
|
||||
(129648 129651)
|
||||
(129536 129647)
|
||||
(129511 129535)
|
||||
(129488 129510)
|
||||
(129485 129487)
|
||||
(129484 129484)
|
||||
(129483 129483)
|
||||
(129475 129482)
|
||||
(129473 129474)
|
||||
(129472 129472)
|
||||
(129466 129471)
|
||||
(129456 129465)
|
||||
(129454 129455)
|
||||
(129451 129453)
|
||||
(129445 129450)
|
||||
(129443 129444)
|
||||
(129432 129442)
|
||||
(129426 129431)
|
||||
(129413 129425)
|
||||
(129408 129412)
|
||||
(129404 129407)
|
||||
(129403 129403)
|
||||
(129402 129402)
|
||||
(129401 129401)
|
||||
(129399 129400)
|
||||
(129395 129398)
|
||||
(129394 129394)
|
||||
(129393 129393)
|
||||
(129388 129392)
|
||||
(129375 129387)
|
||||
(129360 129374)
|
||||
(129357 129359)
|
||||
(129356 129356)
|
||||
(129351 129355)
|
||||
(129344 129349)
|
||||
(129343 129343)
|
||||
(129340 129342)
|
||||
(129331 129338)
|
||||
(129329 129330)
|
||||
(129328 129328)
|
||||
(129320 129327)
|
||||
(129312 129319)
|
||||
(129311 129311)
|
||||
(129305 129310)
|
||||
(129296 129304)
|
||||
(129293 129295)
|
||||
(129292 129292)
|
||||
(129198 129279)
|
||||
(129160 129167)
|
||||
(129114 129119)
|
||||
(129096 129103)
|
||||
(129036 129039)
|
||||
(129004 129023)
|
||||
(128992 129003)
|
||||
(128981 128991)
|
||||
(128884 128895)
|
||||
(128765 128767)
|
||||
(128763 128764)
|
||||
(128762 128762)
|
||||
(128761 128761)
|
||||
(128759 128760)
|
||||
(128756 128758)
|
||||
(128755 128755)
|
||||
(128753 128754)
|
||||
(128752 128752)
|
||||
(128749 128751)
|
||||
(128747 128748)
|
||||
(128746 128746)
|
||||
(128745 128745)
|
||||
(128742 128744)
|
||||
(128736 128741)
|
||||
(128728 128735)
|
||||
(128726 128727)
|
||||
(128725 128725)
|
||||
(128723 128724)
|
||||
(128721 128722)
|
||||
(128720 128720)
|
||||
(128717 128719)
|
||||
(128716 128716)
|
||||
(128715 128715)
|
||||
(128710 128714)
|
||||
(128705 128709)
|
||||
(128704 128704)
|
||||
(128703 128703)
|
||||
(128697 128702)
|
||||
(128695 128696)
|
||||
(128694 128694)
|
||||
(128691 128693)
|
||||
(128690 128690)
|
||||
(128686 128689)
|
||||
(128679 128685)
|
||||
(128678 128678)
|
||||
(128676 128677)
|
||||
(128675 128675)
|
||||
(128674 128674)
|
||||
(128667 128673)
|
||||
(128665 128666)
|
||||
(128664 128664)
|
||||
(128663 128663)
|
||||
(128662 128662)
|
||||
(128661 128661)
|
||||
(128660 128660)
|
||||
(128657 128659)
|
||||
(128656 128656)
|
||||
(128655 128655)
|
||||
(128654 128654)
|
||||
(128653 128653)
|
||||
(128652 128652)
|
||||
(128650 128651)
|
||||
(128649 128649)
|
||||
(128648 128648)
|
||||
(128647 128647)
|
||||
(128646 128646)
|
||||
(128643 128645)
|
||||
(128641 128642)
|
||||
(128640 128640)
|
||||
(128581 128591)
|
||||
(128577 128580)
|
||||
(128567 128576)
|
||||
(128566 128566)
|
||||
(128565 128565)
|
||||
(128564 128564)
|
||||
(128560 128563)
|
||||
(128558 128559)
|
||||
(128557 128557)
|
||||
(128556 128556)
|
||||
(128552 128555)
|
||||
(128550 128551)
|
||||
(128544 128549)
|
||||
(128543 128543)
|
||||
(128540 128542)
|
||||
(128539 128539)
|
||||
(128538 128538)
|
||||
(128537 128537)
|
||||
(128536 128536)
|
||||
(128535 128535)
|
||||
(128534 128534)
|
||||
(128533 128533)
|
||||
(128530 128532)
|
||||
(128529 128529)
|
||||
(128528 128528)
|
||||
(128527 128527)
|
||||
(128526 128526)
|
||||
(128521 128525)
|
||||
(128519 128520)
|
||||
(128513 128518)
|
||||
(128512 128512)
|
||||
(128507 128511)
|
||||
(128506 128506)
|
||||
(128500 128505)
|
||||
(128499 128499)
|
||||
(128496 128498)
|
||||
(128495 128495)
|
||||
(128489 128494)
|
||||
(128488 128488)
|
||||
(128484 128487)
|
||||
(128483 128483)
|
||||
(128482 128482)
|
||||
(128481 128481)
|
||||
(128479 128480)
|
||||
(128476 128478)
|
||||
(128468 128475)
|
||||
(128465 128467)
|
||||
(128453 128464)
|
||||
(128450 128452)
|
||||
(128445 128449)
|
||||
(128444 128444)
|
||||
(128435 128443)
|
||||
(128433 128434)
|
||||
(128425 128432)
|
||||
(128424 128424)
|
||||
(128422 128423)
|
||||
(128421 128421)
|
||||
(128420 128420)
|
||||
(128407 128419)
|
||||
(128405 128406)
|
||||
(128401 128404)
|
||||
(128400 128400)
|
||||
(128398 128399)
|
||||
(128394 128397)
|
||||
(128392 128393)
|
||||
(128391 128391)
|
||||
(128379 128390)
|
||||
(128378 128378)
|
||||
(128371 128377)
|
||||
(128369 128370)
|
||||
(128367 128368)
|
||||
(128360 128366)
|
||||
(128348 128359)
|
||||
(128336 128347)
|
||||
(128335 128335)
|
||||
(128331 128334)
|
||||
(128329 128330)
|
||||
(128326 128328)
|
||||
(128302 128317)
|
||||
(128300 128301)
|
||||
(128278 128299)
|
||||
(128277 128277)
|
||||
(128266 128276)
|
||||
(128265 128265)
|
||||
(128264 128264)
|
||||
(128260 128263)
|
||||
(128259 128259)
|
||||
(128255 128258)
|
||||
(128254 128254)
|
||||
(128253 128253)
|
||||
(128249 128252)
|
||||
(128248 128248)
|
||||
(128246 128247)
|
||||
(128245 128245)
|
||||
(128240 128244)
|
||||
(128239 128239)
|
||||
(128238 128238)
|
||||
(128236 128237)
|
||||
(128184 128235)
|
||||
(128182 128183)
|
||||
(128174 128181)
|
||||
(128173 128173)
|
||||
(128110 128172)
|
||||
(128108 128109)
|
||||
(128102 128107)
|
||||
(128101 128101)
|
||||
(128066 128100)
|
||||
(128065 128065)
|
||||
(128064 128064)
|
||||
(128063 128063)
|
||||
(128043 128062)
|
||||
(128042 128042)
|
||||
(128023 128041)
|
||||
(128022 128022)
|
||||
(128021 128021)
|
||||
(128020 128020)
|
||||
(128019 128019)
|
||||
(128017 128018)
|
||||
(128015 128016)
|
||||
(128012 128014)
|
||||
(128009 128011)
|
||||
(128008 128008)
|
||||
(128000 128007)
|
||||
(127992 127994)
|
||||
(127991 127991)
|
||||
(127990 127990)
|
||||
(127989 127989)
|
||||
(127988 127988)
|
||||
(127987 127987)
|
||||
(127985 127986)
|
||||
(127973 127984)
|
||||
(127972 127972)
|
||||
(127968 127971)
|
||||
(127956 127967)
|
||||
(127951 127955)
|
||||
(127947 127950)
|
||||
(127946 127946)
|
||||
(127945 127945)
|
||||
(127944 127944)
|
||||
(127943 127943)
|
||||
(127942 127942)
|
||||
(127941 127941)
|
||||
(127904 127940)
|
||||
(127902 127903)
|
||||
(127900 127901)
|
||||
(127897 127899)
|
||||
(127896 127896)
|
||||
(127894 127895)
|
||||
(127892 127893)
|
||||
(127872 127891)
|
||||
(127870 127871)
|
||||
(127869 127869)
|
||||
(127868 127868)
|
||||
(127825 127867)
|
||||
(127824 127824)
|
||||
(127820 127823)
|
||||
(127819 127819)
|
||||
(127799 127818)
|
||||
(127798 127798)
|
||||
(127796 127797)
|
||||
(127794 127795)
|
||||
(127792 127793)
|
||||
(127789 127791)
|
||||
(127780 127788)
|
||||
(127778 127779)
|
||||
(127777 127777)
|
||||
(127775 127776)
|
||||
(127773 127774)
|
||||
(127772 127772)
|
||||
(127771 127771)
|
||||
(127770 127770)
|
||||
(127769 127769)
|
||||
(127766 127768)
|
||||
(127763 127765)
|
||||
(127762 127762)
|
||||
(127761 127761)
|
||||
(127760 127760)
|
||||
(127759 127759)
|
||||
(127757 127758)
|
||||
(127744 127756)
|
||||
(127570 127743)
|
||||
(127568 127569)
|
||||
(127561 127567)
|
||||
(127548 127551)
|
||||
(127538 127546)
|
||||
(127535 127535)
|
||||
(127514 127514)
|
||||
(127491 127503)
|
||||
(127489 127490)
|
||||
(127405 127461)
|
||||
(127377 127386)
|
||||
(127374 127374)
|
||||
(127358 127359)
|
||||
(127344 127345)
|
||||
(127340 127343)
|
||||
(127279 127279)
|
||||
(127245 127247)
|
||||
(127184 127231)
|
||||
(127183 127183)
|
||||
(126981 127182)
|
||||
(126980 126980)
|
||||
(126976 126979)
|
||||
(12953 12953)
|
||||
(12951 12951)
|
||||
(12349 12349)
|
||||
(12336 12336)
|
||||
(11093 11093)
|
||||
(11088 11088)
|
||||
(11035 11036)
|
||||
(11013 11015)
|
||||
(10548 10549)
|
||||
(10175 10175)
|
||||
(10160 10160)
|
||||
(10145 10145)
|
||||
(10133 10135)
|
||||
(10085 10087)
|
||||
(10084 10084)
|
||||
(10083 10083)
|
||||
(10071 10071)
|
||||
(10067 10069)
|
||||
(10062 10062)
|
||||
(10060 10060)
|
||||
(10055 10055)
|
||||
(10052 10052)
|
||||
(10035 10036)
|
||||
(10024 10024)
|
||||
(10017 10017)
|
||||
(10013 10013)
|
||||
(10006 10006)
|
||||
(10004 10004)
|
||||
(10002 10002)
|
||||
(10000 10001)
|
||||
(9999 9999)
|
||||
(9998 9998)
|
||||
(9997 9997)
|
||||
(9992 9996)
|
||||
(9989 9989)
|
||||
(9987 9988)
|
||||
(9986 9986)
|
||||
(9982 9985)
|
||||
(9981 9981)
|
||||
(9979 9980)
|
||||
(9978 9978)
|
||||
(9975 9977)
|
||||
(9974 9974)
|
||||
(9973 9973)
|
||||
(9972 9972)
|
||||
(9970 9971)
|
||||
(9968 9969)
|
||||
(9963 9967)
|
||||
(9962 9962)
|
||||
(9961 9961)
|
||||
(9941 9960)
|
||||
(9940 9940)
|
||||
(9939 9939)
|
||||
(9938 9938)
|
||||
(9937 9937)
|
||||
(9936 9936)
|
||||
(9935 9935)
|
||||
(9934 9934)
|
||||
(9929 9933)
|
||||
(9928 9928)
|
||||
(9926 9927)
|
||||
(9924 9925)
|
||||
(9919 9923)
|
||||
(9917 9918)
|
||||
(9906 9916)
|
||||
(9904 9905)
|
||||
(9900 9903)
|
||||
(9898 9899)
|
||||
(9896 9897)
|
||||
(9895 9895)
|
||||
(9890 9894)
|
||||
(9888 9889)
|
||||
(9885 9887)
|
||||
(9883 9884)
|
||||
(9882 9882)
|
||||
(9881 9881)
|
||||
(9880 9880)
|
||||
(9878 9879)
|
||||
(9877 9877)
|
||||
(9876 9876)
|
||||
(9875 9875)
|
||||
(9874 9874)
|
||||
(9872 9873)
|
||||
(9856 9861)
|
||||
(9855 9855)
|
||||
(9854 9854)
|
||||
(9852 9853)
|
||||
(9851 9851)
|
||||
(9833 9850)
|
||||
(9832 9832)
|
||||
(9831 9831)
|
||||
(9829 9830)
|
||||
(9828 9828)
|
||||
(9827 9827)
|
||||
(9825 9826)
|
||||
(9824 9824)
|
||||
(9823 9823)
|
||||
(9812 9822)
|
||||
(9800 9811)
|
||||
(9795 9799)
|
||||
(9794 9794)
|
||||
(9793 9793)
|
||||
(9792 9792)
|
||||
(9787 9791)
|
||||
(9786 9786)
|
||||
(9784 9785)
|
||||
(9776 9783)
|
||||
(9775 9775)
|
||||
(9774 9774)
|
||||
(9771 9773)
|
||||
(9770 9770)
|
||||
(9767 9769)
|
||||
(9766 9766)
|
||||
(9764 9765)
|
||||
(9762 9763)
|
||||
(9761 9761)
|
||||
(9760 9760)
|
||||
(9758 9759)
|
||||
(9757 9757)
|
||||
(9753 9756)
|
||||
(9752 9752)
|
||||
(9750 9751)
|
||||
(9748 9749)
|
||||
(9746 9746)
|
||||
(9745 9745)
|
||||
(9743 9744)
|
||||
(9742 9742)
|
||||
(9735 9741)
|
||||
(9733 9733)
|
||||
(9732 9732)
|
||||
(9730 9731)
|
||||
(9728 9729)
|
||||
(9723 9726)
|
||||
(9664 9664)
|
||||
(9654 9654)
|
||||
(9642 9643)
|
||||
(9410 9410)
|
||||
(9208 9210)
|
||||
(9203 9203)
|
||||
(9201 9202)
|
||||
(9200 9200)
|
||||
(9199 9199)
|
||||
(9197 9198)
|
||||
(9193 9196)
|
||||
(9167 9167)
|
||||
(9096 9096)
|
||||
(9000 9000)
|
||||
(8986 8987)
|
||||
(8617 8618)
|
||||
(8596 8601)
|
||||
(8505 8505)
|
||||
(8482 8482)
|
||||
(8265 8265)
|
||||
(8252 8252)))
|
||||
|
||||
(define char-set:emoji (char-set))
|
||||
|
||||
(for-each
|
||||
(λ (pair)
|
||||
(ucs-range->char-set!
|
||||
(first pair)
|
||||
(+ 1 (second pair))
|
||||
#t
|
||||
char-set:emoji))
|
||||
emoji-list)
|
|
@ -0,0 +1,67 @@
|
|||
(define-module (runewidth internal)
|
||||
#:use-module (ice-9 peg)
|
||||
#:use-module (ice-9 textual-ports)
|
||||
#:use-module (ice-9 exceptions)
|
||||
#:use-module (ice-9 i18n)
|
||||
#:use-module (web uri)
|
||||
#:use-module (web client)
|
||||
#:use-module (web request)
|
||||
#:use-module (srfi srfi-71)
|
||||
#:export (@hex
|
||||
@codepoint
|
||||
@codepoint-range
|
||||
@comment
|
||||
@ws
|
||||
hex-string->integer
|
||||
format-exception-msg
|
||||
in-surrogate-range
|
||||
wget-to-lines
|
||||
file-to-lines))
|
||||
|
||||
(define-peg-pattern @hex body (peg "[a-fA-F0-9]"))
|
||||
|
||||
(define-peg-pattern @codepoint all
|
||||
(* @hex))
|
||||
|
||||
(define-peg-pattern @codepoint-range all
|
||||
(or
|
||||
(and @codepoint (ignore "..") @codepoint)
|
||||
@codepoint))
|
||||
|
||||
|
||||
(define-peg-pattern @comment all
|
||||
(and (ignore "#") (* peg-any)))
|
||||
|
||||
(define-peg-pattern @ws none
|
||||
(or " " "\t"))
|
||||
|
||||
(define (hex-string->integer str)
|
||||
;; XXX: We would ideally do integer->char here and save it to file as such
|
||||
;; However read-expr* does not actually work for all the characters!
|
||||
;; So they can't be written out as such.
|
||||
(locale-string->integer str 16))
|
||||
|
||||
(define (format-exception-msg port err)
|
||||
(apply format port (exception-message err) (exception-irritants err))
|
||||
(display "\n" port))
|
||||
|
||||
(define (in-surrogate-range num)
|
||||
(and (>= num #xd800)
|
||||
(<= num #xdfff)))
|
||||
|
||||
|
||||
(define* (wget-to-lines url #:optional (port #f))
|
||||
(format port "Downloading from ~a..." url)
|
||||
|
||||
(let* ((response body (http-get (string->uri url)))
|
||||
(lines (string-split body #\newline)))
|
||||
(format port " done.\n")
|
||||
lines))
|
||||
|
||||
(define* (file-to-lines path #:optional (port #f))
|
||||
(format port "Loading from local file ~a\n" path)
|
||||
(string-split
|
||||
(with-input-from-file path
|
||||
(λ () (get-string-all (current-input-port)))) #\newline))
|
||||
|
||||
|
|
@ -0,0 +1,165 @@
|
|||
#!@GUILE@ --no-auto-compile
|
||||
-*- scheme -*-
|
||||
!#
|
||||
|
||||
;; Can be called with a trailing argument pointing to the file on disk.
|
||||
|
||||
(use-modules
|
||||
(runewidth internal)
|
||||
(ice-9 pretty-print)
|
||||
(ice-9 peg)
|
||||
(ice-9 format)
|
||||
(ice-9 exceptions)
|
||||
(ice-9 match)
|
||||
(ice-9 hash-table)
|
||||
(srfi srfi-1))
|
||||
|
||||
(define stdout (current-output-port))
|
||||
|
||||
(define east-asian-url
|
||||
"https://unicode.org/Public/13.0.0/ucd/EastAsianWidth.txt")
|
||||
|
||||
(define-peg-pattern @ea-width-prop all
|
||||
(or "A" "F" "H" "Na" "N" "W"))
|
||||
|
||||
(define-peg-pattern @ea-datum body
|
||||
(and @codepoint-range (* @ws) (ignore ";") (* @ws) @ea-width-prop))
|
||||
|
||||
(define-peg-pattern @ea-line body
|
||||
(and @ea-datum (* @ws) @comment))
|
||||
|
||||
(define ea-chars-ht (make-hash-table 6))
|
||||
|
||||
(define (process-east-asian-line line)
|
||||
(define (cons-ht! key low high)
|
||||
(let* ((old (hashq-ref ea-chars-ht key))
|
||||
(value (list low high))
|
||||
(new-lst
|
||||
(if old
|
||||
(cons value old)
|
||||
(list value))))
|
||||
(hashq-set! ea-chars-ht key new-lst)))
|
||||
|
||||
(define tree (peg:tree (match-pattern @ea-line line)))
|
||||
|
||||
(unless (or (not tree)
|
||||
(null? tree)
|
||||
(eq? '@comment (car tree)))
|
||||
|
||||
(match tree
|
||||
(((('@codepoint-range
|
||||
('@codepoint codepoints) ...)
|
||||
('@ea-width-prop width-prop))
|
||||
('@comment comment))
|
||||
|
||||
(with-exception-handler
|
||||
(λ (err)
|
||||
(format stdout "Skipping line due to error :: ")
|
||||
(format-exception-msg stdout err))
|
||||
(λ ()
|
||||
(let ((f (hex-string->integer (first codepoints)))
|
||||
(l (hex-string->integer (last codepoints))))
|
||||
|
||||
(when (or (in-surrogate-range f)
|
||||
(in-surrogate-range l))
|
||||
(error (format #f "chars in surrogate range ~x -> ~x" f l)))
|
||||
|
||||
(if (string-contains comment "COMBINING")
|
||||
(cons-ht! 'combining f l)
|
||||
(match width-prop
|
||||
((or "W" "F")
|
||||
(cons-ht! 'doublewidth f l))
|
||||
("H"
|
||||
(cons-ht! 'halfwidth f l))
|
||||
("Na"
|
||||
(cons-ht! 'narrow f l))
|
||||
("N"
|
||||
(cons-ht! 'neutral f l))
|
||||
("A"
|
||||
(cons-ht! 'ambiguous f l))))))
|
||||
#:unwind? #t)))))
|
||||
|
||||
(define ea-sets
|
||||
'(combining
|
||||
doublewidth
|
||||
halfwidth
|
||||
narrow
|
||||
neutral
|
||||
ambiguous))
|
||||
|
||||
(define ea-symbol-names
|
||||
(map
|
||||
(λ (set)
|
||||
(string->symbol
|
||||
(string-concatenate
|
||||
(list "char-set:eastasian-"
|
||||
(symbol->string set)))))
|
||||
ea-sets))
|
||||
|
||||
(define ea-sets-and-symbols
|
||||
(zip ea-sets ea-symbol-names))
|
||||
|
||||
|
||||
(define line-func
|
||||
(if (= 2 (length (command-line)))
|
||||
(λ ()
|
||||
(file-to-lines (last (command-line)) stdout))
|
||||
(λ ()
|
||||
(wget-to-lines east-asian-url stdout))))
|
||||
|
||||
(define file "runewidth/eastasian.scm")
|
||||
|
||||
(format stdout "Writing to ~a...\n" file)
|
||||
|
||||
(with-output-to-file file
|
||||
(λ ()
|
||||
(display ";; Code generated by script/generate. DO NOT EDIT\n\n")
|
||||
|
||||
(for-each process-east-asian-line (line-func))
|
||||
|
||||
(pretty-print
|
||||
`(define-module (runewidth eastasian)
|
||||
#:use-module (ice-9 hash-table)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:export
|
||||
,ea-symbol-names))
|
||||
|
||||
(pretty-print
|
||||
`(define chars-ht
|
||||
(alist->hashq-table ',(hash-map->list cons ea-chars-ht))))
|
||||
|
||||
(display "\n")
|
||||
|
||||
(pretty-print
|
||||
`(define-syntax-rule (ranges->charset! name symbol)
|
||||
(let* ((pairs (hashq-ref chars-ht name)))
|
||||
(for-each
|
||||
(λ (pair)
|
||||
(ucs-range->char-set!
|
||||
(first pair)
|
||||
;; Exclusive upper range, so add one
|
||||
(+ (second pair) 1)
|
||||
#t symbol))
|
||||
pairs))))
|
||||
|
||||
(display "\n")
|
||||
|
||||
(for-each
|
||||
(λ (sym)
|
||||
(pretty-print
|
||||
`(define ,sym (char-set))))
|
||||
ea-symbol-names)
|
||||
|
||||
(display "\n")
|
||||
|
||||
(for-each
|
||||
(λ (set-pair)
|
||||
(let ((name (first set-pair))
|
||||
(symbol (second set-pair)))
|
||||
(pretty-print
|
||||
`(ranges->charset! ',name ,symbol))))
|
||||
ea-sets-and-symbols)
|
||||
|
||||
(display "Code generation complete.\n" stdout)))
|
||||
|
||||
(format stdout "Written to ~a.\n" file)
|
|
@ -0,0 +1,109 @@
|
|||
#!@GUILE@ --no-auto-compile
|
||||
-*- scheme -*-
|
||||
!#
|
||||
|
||||
;; Can be called with a trailing argument pointing to the file on disk.
|
||||
|
||||
(use-modules
|
||||
(runewidth internal)
|
||||
(ice-9 pretty-print)
|
||||
(ice-9 peg)
|
||||
(ice-9 format)
|
||||
(ice-9 exceptions)
|
||||
(ice-9 match)
|
||||
(srfi srfi-1))
|
||||
|
||||
(define stdout (current-output-port))
|
||||
|
||||
(define emoji-url
|
||||
"https://unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt")
|
||||
|
||||
(define-peg-pattern @emoji-category all
|
||||
(* (peg "[a-zA-Z_]")))
|
||||
|
||||
(define-peg-pattern @emoji-datum body
|
||||
(and @codepoint-range (* @ws) (ignore ";") (* @ws) @emoji-category))
|
||||
|
||||
(define-peg-pattern @emoji-line body
|
||||
(and @emoji-datum (* @ws) @comment))
|
||||
|
||||
(define emoji-list '())
|
||||
|
||||
(define (process-emoji-line line)
|
||||
(define tree (peg:tree (match-pattern @emoji-line line)))
|
||||
|
||||
(define (in-surrogate-range num)
|
||||
(and (>= num #xd800)
|
||||
(<= num #xdfff)))
|
||||
|
||||
(unless (or (not tree)
|
||||
(null? tree)
|
||||
(eq? '@comment (car tree)))
|
||||
|
||||
(match tree
|
||||
(((('@codepoint-range
|
||||
('@codepoint codepoints) ...)
|
||||
('@emoji-category category))
|
||||
('@comment comment))
|
||||
|
||||
(with-exception-handler
|
||||
(λ (err)
|
||||
(format stdout "Skipping line due to error :: ")
|
||||
(format-exception-msg stdout err))
|
||||
(λ ()
|
||||
(let ((f (hex-string->integer (first codepoints)))
|
||||
(l (hex-string->integer (last codepoints))))
|
||||
|
||||
(when (or (in-surrogate-range f)
|
||||
(in-surrogate-range l))
|
||||
(error (format #f "chars in surrogate range ~x -> ~x" f l)))
|
||||
|
||||
(when (and (equal? "Extended_Pictographic" category)
|
||||
(> l #xFF))
|
||||
(set! emoji-list (cons (list f l) emoji-list)))))
|
||||
#:unwind? #t)))))
|
||||
|
||||
(define line-func
|
||||
(if (= 2 (length (command-line)))
|
||||
(λ ()
|
||||
(file-to-lines (last (command-line)) stdout))
|
||||
(λ ()
|
||||
(wget-to-lines emoji-url stdout))))
|
||||
|
||||
(define file "runewidth/emoji.scm")
|
||||
|
||||
(format stdout "Writing to ~a...\n" file)
|
||||
|
||||
(with-output-to-file file
|
||||
(λ ()
|
||||
(display ";; Code generated by script/generate. DO NOT EDIT\n\n")
|
||||
|
||||
(for-each process-emoji-line (line-func))
|
||||
|
||||
(pretty-print
|
||||
`(define-module (runewidth emoji)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:export (char-set:emoji)))
|
||||
|
||||
(pretty-print
|
||||
`(define emoji-list ',emoji-list))
|
||||
|
||||
(display "\n")
|
||||
|
||||
(pretty-print
|
||||
`(define char-set:emoji (char-set)))
|
||||
|
||||
(display "\n")
|
||||
|
||||
(pretty-print
|
||||
`(for-each
|
||||
(λ (pair)
|
||||
(ucs-range->char-set!
|
||||
(first pair)
|
||||
(+ 1 (second pair))
|
||||
#t char-set:emoji))
|
||||
emoji-list))
|
||||
|
||||
(display "Code generation complete.\n" stdout)))
|
||||
|
||||
(format stdout "Written to ~a.\n" file)
|
|
@ -1,61 +0,0 @@
|
|||
#!@GUILE@ --no-auto-compile
|
||||
-*- scheme -*-
|
||||
!#
|
||||
|
||||
(use-modules
|
||||
(ice-9 pretty-print)
|
||||
(ice-9 peg)
|
||||
(web uri)
|
||||
(web request))
|
||||
|
||||
(define stdout (current-output-port))
|
||||
|
||||
(define east-asian-url
|
||||
"https://unicode.org/Public/13.0.0/ucd/EastAsianWidth.txt")
|
||||
|
||||
(define emoji-url
|
||||
"https://unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt")
|
||||
|
||||
(define-peg-pattern @hex body (peg "[a-fA-F0-9]"))
|
||||
|
||||
(define-peg-pattern @codepoint all
|
||||
(* @hex))
|
||||
|
||||
(define-peg-pattern @ea-width-prop all
|
||||
(or "A" "F" "H" "N" "Na" "W"))
|
||||
|
||||
(define-peg-pattern @codepoint-range all
|
||||
(or
|
||||
(and @codepoint (ignore "..") @codepoint)
|
||||
@codepoint))
|
||||
|
||||
(define-peg-pattern @ea-datum body
|
||||
(and @codepoint-range (ignore ";") @ea-width-prop))
|
||||
|
||||
(define-peg-pattern @comment none
|
||||
(and "#" peg-any))
|
||||
|
||||
(define-peg-pattern @ea-line body
|
||||
(and (? @ea-datum) (? @comment)))
|
||||
|
||||
|
||||
(define (process-east-asian-line line)
|
||||
(let ((tree (peg:tree (match-pattern @ea-line line))))
|
||||
(unless (null? tree)
|
||||
(pk tree))))
|
||||
|
||||
|
||||
(define (wget-to-lines url)
|
||||
(string-split (http-get (string->uri url)) #\newline))
|
||||
|
||||
(with-output-to-file "../runewidth/table.scm"
|
||||
(λ ()
|
||||
|
||||
(display ";; Code generated by script/generate. DO NOT EDIT\n\n")
|
||||
(display "(define-module (runewidth table))\n\n")
|
||||
|
||||
(format "Downloading and processing from ~a..." east-asian-url)
|
||||
(for-each process-east-asian-line (wget-to-lines east-asian-url)))
|
||||
|
||||
(format "Downloading and processing from ~a..." emoji-url)
|
||||
(for-each process-emoji-line (wget-to-lines emoji-url))))
|
Loading…
Reference in New Issue