Holy shit it works

This commit is contained in:
Vivianne 2024-03-03 20:25:47 -05:00
parent 1692a9fc05
commit e53876be52
9 changed files with 2261 additions and 1383 deletions

View file

@ -112,8 +112,7 @@
#:use-module (ice-9 hash-table)
#:use-module (srfi srfi-1)
#:use-module (uniseg internal)
#:export
(,@ea-symbol-names
#:export (,@ea-symbol-names
eastasian-charsets)))
(pretty-print

View file

@ -27,9 +27,38 @@
(define-peg-pattern @emoji-line body
(and @emoji-datum (* @ws) @comment))
(define emoji-list '())
(define emoji-ht (make-hash-table 5))
(define emoji-sets
'(emoji
emoji-presentation
emoji-modifier
emoji-modifier-base
emoji-component
emoji-extended-pictographic))
(define emoji-symbol-names
(map
(λ (set)
(string->symbol
(string-concatenate
(list "char-set:"
(symbol->string set)))))
emoji-sets))
(define emoji-sets-and-symbols
(zip emoji-sets emoji-symbol-names))
(define (process-emoji-line line)
(define (string->category str)
(match str
("Emoji" 'emoji)
("Emoji_Presentation" 'emoji-presentation)
("Emoji_Modifier" 'emoji-modifier)
("Emoji_Modifier_Base" 'emoji-modifier-base)
("Emoji_Component" 'emoji-component)
("Extended_Pictographic" 'emoji-extended-pictographic)))
(define tree (peg:tree (match-pattern @emoji-line line)))
(unless (or (not tree)
@ -39,7 +68,7 @@
(match tree
(((('@codepoint-range
('@codepoint codepoints) ...)
('@emoji-category category))
('@emoji-category cat-str))
('@comment comment))
(with-exception-handler
@ -48,15 +77,14 @@
(format-exception-msg stdout err))
(λ ()
(let ((f (hex-string->integer (first codepoints)))
(l (hex-string->integer (last codepoints))))
(l (hex-string->integer (last codepoints)))
(category (string->category cat-str)))
(when (or (in-surrogate-range f)
(in-surrogate-range l))
(error (format #f "chars in surrogate range ~x -> ~x" f l)))
(when (and (equal? "Extended_Pictographic" category)
(> l #xFF))
(set! emoji-list (cons (list f l) emoji-list)))))
(cons-hash-list! emoji-ht category f l)))
#:unwind? #t)))))
(define line-func
@ -78,27 +106,45 @@
(pretty-print
`(define-module (uniseg emoji)
#:use-module (uniseg internal)
#:use-module (ice-9 hash-table)
#:use-module (srfi srfi-1)
#:export (char-set:extended-pictographic)))
#:export (,@emoji-symbol-names
emoji-charsets)))
(pretty-print
`(define emoji-list ',emoji-list))
`(define emoji-ht
(alist->hashq-table ',(hash-map->list cons emoji-ht))))
(display "\n")
(for-each
(λ (sym)
(pretty-print
`(define ,sym (char-set))))
emoji-symbol-names)
(display "\n")
(pretty-print
`(define char-set:extended-pictographic (char-set)))
`(define emoji-charsets
(list
,@(map
(λ (pair)
(let ((f (first pair))
(s (second pair)))
`(list ',f ,s)))
emoji-sets-and-symbols))))
(display "\n")
(pretty-print
`(for-each
(λ (pair)
(ucs-range->char-set!
(first pair)
(+ 1 (second pair))
#t char-set:extended-pictographic))
emoji-list))
(for-each
(λ (set-pair)
(let ((name (first set-pair))
(symbol (second set-pair)))
(pretty-print
`(ranges->charset! emoji-ht ',name ,symbol))))
emoji-sets-and-symbols)
(display "Code generation complete.\n" stdout)))

View file

@ -42,7 +42,7 @@
extend
regional-indicator
spacing-mark
zerowidth-joiner))
zero-width-joiner))
(define grapheme-symbol-names
(map
@ -71,7 +71,7 @@
("Extend" 'extend)
("Regional_Indicator" 'regional-indicator)
("SpacingMark" 'spacing-mark)
("ZWJ" 'zerowidth-joiner)))
("ZWJ" 'zero-width-joiner)))
(define tree (peg:tree (match-pattern @grapheme-line line)))
@ -123,7 +123,7 @@
#:use-module (ice-9 hash-table)
#:use-module (srfi srfi-1)
#:use-module (uniseg internal)
#:export ( ,@grapheme-symbol-names
#:export (,@grapheme-symbol-names
grapheme-charsets)))

View file

@ -1,17 +1,21 @@
(define-module (uniseg)
#:use-module (srfi srfi-1)
#:use-module (ice-9 match)
#:use-module (srfi srfi-41)
#:use-module (uniseg emoji)
#:use-module (uniseg graphemes)
#:use-module (uniseg graphemes stream)
#:use-module (uniseg eastasian)
#:export (emoji?
grapheme-property
eastasian-property))
char->grapheme-property
char->eastasian-property
char-width
string-width))
(define (emoji? char)
(char-set-contains? char-set:extended-pictographic char))
(char-set-contains? char-set:emoji-extended-pictographic char))
(define (get-prop sets char)
(define (char->prop sets char)
(let ((pair
(find
(λ (p) (char-set-contains? (second p) char))
@ -19,12 +23,12 @@
(and pair
(first pair))))
(define (grapheme-property char)
(define (char->grapheme-property char)
"Find the Unicode grapheme cluster property, as defined by https://www.unicode.org/reports/tr44/"
(or (get-prop grapheme-charsets char)
(or (char->prop grapheme-charsets char)
'other))
(define (eastasian-property char)
(define (char->eastasian-property char)
"Find the Unicode East Asian width property, as defined by https://www.unicode.org/reports/tr11/"
(define (<==> lo hi)
"Create a range predicate, inclusive between two numbers"
@ -47,7 +51,7 @@
(define range-plane-3?
(<==> #x30000 #x3FFFD))
(or (get-prop eastasian-charsets char)
(or (char->prop eastasian-charsets char)
(let ((int (char->integer char)))
(match int
((or (? range-unified-ideographs-ext-a?)
@ -58,3 +62,43 @@
'doublewidth)
(else
'neutral)))))
(define (char-width rune)
"Convert from a character to its visible width. Returns the width as well as the grapheme property to reduce calculations"
(define grapheme-prop (char->grapheme-property rune))
(values
(or
(match grapheme-prop
((or 'control
'carriage-return
'line-feed
'extend
'zero-width-joiner)
0)
('regional-indicator
2)
('extended-pictographic
(if
(char-set-contains? char-set:emoji-presentation rune)
2
1))
(else #f)
)
(match (char->integer rune)
(#x2E3A 3)
(#x2E3B 4)
(else #f))
(match (char->eastasian-property rune)
('doublewidth 2)
;; Default for monospace is to render ambiguous as 1
('ambiguous 1)
(else 1)))
grapheme-prop))
(define (string-width str)
"Get the width of a string by adding up the widths of each grapheme"
(stream-fold
(λ (val grapheme)
(+ val (grapheme-width grapheme)))
0
(string->grapheme-stream str)))

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -20,28 +20,176 @@
char-set:grapheme-extend
char-set:grapheme-regional-indicator
char-set:grapheme-spacing-mark
char-set:grapheme-zerowidth-joiner
char-set:grapheme-zero-width-joiner
grapheme-charsets))
(define grapheme-ht
(alist->hashq-table
'((prepend
(73474 73474)
(73030 73030)
(72324 72329)
(72250 72250)
(72001 72001)
(71999 71999)
(70082 70083)
(69837 69837)
(69821 69821)
(3406 3406)
(2274 2274)
(2192 2193)
(1807 1807)
(1757 1757)
(1536 1541))
(regional-indicator (127462 127487))
(carriage-return (13 13))
'((spacing-mark
(119149 119149)
(119142 119142)
(94192 94193)
(94033 94087)
(73537 73537)
(73534 73535)
(73524 73525)
(73475 73475)
(73461 73462)
(73110 73110)
(73107 73108)
(73098 73102)
(72884 72884)
(72881 72881)
(72873 72873)
(72766 72766)
(72751 72751)
(72343 72343)
(72279 72280)
(72249 72249)
(72164 72164)
(72156 72159)
(72145 72147)
(72002 72002)
(72000 72000)
(71997 71997)
(71991 71992)
(71985 71989)
(71736 71736)
(71724 71726)
(71462 71462)
(71350 71350)
(71342 71343)
(71340 71340)
(71230 71230)
(71227 71228)
(71216 71218)
(71102 71102)
(71096 71099)
(71088 71089)
(70849 70849)
(70846 70846)
(70843 70844)
(70841 70841)
(70833 70834)
(70725 70725)
(70720 70721)
(70709 70711)
(70498 70499)
(70475 70477)
(70471 70472)
(70465 70468)
(70463 70463)
(70402 70403)
(70368 70370)
(70197 70197)
(70194 70195)
(70188 70190)
(70094 70094)
(70079 70080)
(70067 70069)
(70018 70018)
(69957 69958)
(69932 69932)
(69815 69816)
(69808 69810)
(69762 69762)
(69634 69634)
(69632 69632)
(44012 44012)
(44009 44010)
(44006 44007)
(44003 44004)
(43765 43765)
(43758 43759)
(43755 43755)
(43597 43597)
(43571 43572)
(43567 43568)
(43454 43456)
(43450 43451)
(43444 43445)
(43395 43395)
(43346 43347)
(43188 43203)
(43136 43137)
(43047 43047)
(43043 43044)
(7415 7415)
(7393 7393)
(7220 7221)
(7204 7211)
(7154 7155)
(7150 7150)
(7146 7148)
(7143 7143)
(7082 7082)
(7078 7079)
(7073 7073)
(7042 7042)
(6979 6980)
(6973 6977)
(6971 6971)
(6916 6916)
(6765 6770)
(6743 6743)
(6741 6741)
(6681 6682)
(6451 6456)
(6448 6449)
(6441 6443)
(6435 6438)
(6087 6088)
(6078 6085)
(6070 6070)
(5940 5940)
(5909 5909)
(4228 4228)
(4182 4183)
(4155 4156)
(4145 4145)
(3967 3967)
(3902 3903)
(3763 3763)
(3635 3635)
(3570 3571)
(3544 3550)
(3536 3537)
(3458 3459)
(3402 3404)
(3398 3400)
(3391 3392)
(3330 3331)
(3315 3315)
(3274 3275)
(3271 3272)
(3267 3268)
(3264 3265)
(3262 3262)
(3202 3203)
(3137 3140)
(3073 3075)
(3018 3020)
(3014 3016)
(3009 3010)
(3007 3007)
(2891 2892)
(2887 2888)
(2880 2880)
(2818 2819)
(2763 2764)
(2761 2761)
(2750 2752)
(2691 2691)
(2622 2624)
(2563 2563)
(2507 2508)
(2503 2504)
(2495 2496)
(2434 2435)
(2382 2383)
(2377 2380)
(2366 2368)
(2363 2363)
(2307 2307))
(extend
(917760 917999)
(917536 917631)
@ -420,34 +568,6 @@
(1160 1161)
(1155 1159)
(768 879))
(hangul-syllable-t (55243 55291) (4520 4607))
(control
(918000 921599)
(917632 917759)
(917506 917535)
(917505 917505)
(917504 917504)
(119155 119162)
(113824 113827)
(78896 78911)
(65529 65531)
(65520 65528)
(65279 65279)
(8294 8303)
(8293 8293)
(8288 8292)
(8234 8238)
(8233 8233)
(8232 8232)
(8206 8207)
(8203 8203)
(6158 6158)
(1564 1564)
(173 173)
(127 159)
(14 31)
(11 12)
(0 9))
(hangul-syllable-lv
(55176 55176)
(55148 55148)
@ -848,174 +968,6 @@
(44088 44088)
(44060 44060)
(44032 44032))
(line-feed (10 10))
(hangul-syllable-v (55216 55238) (4448 4519))
(spacing-mark
(119149 119149)
(119142 119142)
(94192 94193)
(94033 94087)
(73537 73537)
(73534 73535)
(73524 73525)
(73475 73475)
(73461 73462)
(73110 73110)
(73107 73108)
(73098 73102)
(72884 72884)
(72881 72881)
(72873 72873)
(72766 72766)
(72751 72751)
(72343 72343)
(72279 72280)
(72249 72249)
(72164 72164)
(72156 72159)
(72145 72147)
(72002 72002)
(72000 72000)
(71997 71997)
(71991 71992)
(71985 71989)
(71736 71736)
(71724 71726)
(71462 71462)
(71350 71350)
(71342 71343)
(71340 71340)
(71230 71230)
(71227 71228)
(71216 71218)
(71102 71102)
(71096 71099)
(71088 71089)
(70849 70849)
(70846 70846)
(70843 70844)
(70841 70841)
(70833 70834)
(70725 70725)
(70720 70721)
(70709 70711)
(70498 70499)
(70475 70477)
(70471 70472)
(70465 70468)
(70463 70463)
(70402 70403)
(70368 70370)
(70197 70197)
(70194 70195)
(70188 70190)
(70094 70094)
(70079 70080)
(70067 70069)
(70018 70018)
(69957 69958)
(69932 69932)
(69815 69816)
(69808 69810)
(69762 69762)
(69634 69634)
(69632 69632)
(44012 44012)
(44009 44010)
(44006 44007)
(44003 44004)
(43765 43765)
(43758 43759)
(43755 43755)
(43597 43597)
(43571 43572)
(43567 43568)
(43454 43456)
(43450 43451)
(43444 43445)
(43395 43395)
(43346 43347)
(43188 43203)
(43136 43137)
(43047 43047)
(43043 43044)
(7415 7415)
(7393 7393)
(7220 7221)
(7204 7211)
(7154 7155)
(7150 7150)
(7146 7148)
(7143 7143)
(7082 7082)
(7078 7079)
(7073 7073)
(7042 7042)
(6979 6980)
(6973 6977)
(6971 6971)
(6916 6916)
(6765 6770)
(6743 6743)
(6741 6741)
(6681 6682)
(6451 6456)
(6448 6449)
(6441 6443)
(6435 6438)
(6087 6088)
(6078 6085)
(6070 6070)
(5940 5940)
(5909 5909)
(4228 4228)
(4182 4183)
(4155 4156)
(4145 4145)
(3967 3967)
(3902 3903)
(3763 3763)
(3635 3635)
(3570 3571)
(3544 3550)
(3536 3537)
(3458 3459)
(3402 3404)
(3398 3400)
(3391 3392)
(3330 3331)
(3315 3315)
(3274 3275)
(3271 3272)
(3267 3268)
(3264 3265)
(3262 3262)
(3202 3203)
(3137 3140)
(3073 3075)
(3018 3020)
(3014 3016)
(3009 3010)
(3007 3007)
(2891 2892)
(2887 2888)
(2880 2880)
(2818 2819)
(2763 2764)
(2761 2761)
(2750 2752)
(2691 2691)
(2622 2624)
(2563 2563)
(2507 2508)
(2503 2504)
(2495 2496)
(2434 2435)
(2382 2383)
(2377 2380)
(2366 2368)
(2363 2363)
(2307 2307))
(hangul-syllable-lvt
(55177 55203)
(55149 55175)
@ -1417,7 +1369,55 @@
(44061 44087)
(44033 44059))
(hangul-syllable-l (43360 43388) (4352 4447))
(zerowidth-joiner (8205 8205)))))
(hangul-syllable-v (55216 55238) (4448 4519))
(prepend
(73474 73474)
(73030 73030)
(72324 72329)
(72250 72250)
(72001 72001)
(71999 71999)
(70082 70083)
(69837 69837)
(69821 69821)
(3406 3406)
(2274 2274)
(2192 2193)
(1807 1807)
(1757 1757)
(1536 1541))
(regional-indicator (127462 127487))
(line-feed (10 10))
(control
(918000 921599)
(917632 917759)
(917506 917535)
(917505 917505)
(917504 917504)
(119155 119162)
(113824 113827)
(78896 78911)
(65529 65531)
(65520 65528)
(65279 65279)
(8294 8303)
(8293 8293)
(8288 8292)
(8234 8238)
(8233 8233)
(8232 8232)
(8206 8207)
(8203 8203)
(6158 6158)
(1564 1564)
(173 173)
(127 159)
(14 31)
(11 12)
(0 9))
(zero-width-joiner (8205 8205))
(carriage-return (13 13))
(hangul-syllable-t (55243 55291) (4520 4607)))))
(define char-set:grapheme-hangul-syllable-l
(char-set))
@ -1437,7 +1437,7 @@
(char-set))
(define char-set:grapheme-spacing-mark
(char-set))
(define char-set:grapheme-zerowidth-joiner
(define char-set:grapheme-zero-width-joiner
(char-set))
(define grapheme-charsets
@ -1459,8 +1459,8 @@
char-set:grapheme-regional-indicator)
(list 'spacing-mark
char-set:grapheme-spacing-mark)
(list 'zerowidth-joiner
char-set:grapheme-zerowidth-joiner)))
(list 'zero-width-joiner
char-set:grapheme-zero-width-joiner)))
(ranges->charset!
grapheme-ht
@ -1508,5 +1508,5 @@
char-set:grapheme-spacing-mark)
(ranges->charset!
grapheme-ht
'zerowidth-joiner
char-set:grapheme-zerowidth-joiner)
'zero-width-joiner
char-set:grapheme-zero-width-joiner)

View file

@ -1,5 +1,8 @@
(define-module (runewidth graphemes stream)
(define-module (uniseg graphemes stream)
#:use-module (uniseg)
#:use-module (ice-9 textual-ports)
#:use-module (ice-9 match)
#:use-module (srfi srfi-71)
#:use-module (srfi srfi-41)
#:use-module (srfi srfi-9 gnu)
#:export (make-grapheme
@ -8,20 +11,23 @@
grapheme-width
grapheme-sentence-end?
grapheme-word-end?
string->grapheme-stream
input->grapheme-stream))
;; TODO: uniseg also does word and sentence boundaries. These state machines could be implemented if we wanted to.
(define-immutable-record-type <grapheme>
(make-grapheme glyphs width sentence-end? word-end?)
(make-grapheme glyphs width)
grapheme?
(glyphs grapheme-glyphs)
(width grapheme-width)
(sentence-end? grapheme-sentence-end?)
(word-end? grapheme-word-end?))
(width grapheme-width))
(define (string->grapheme-stream str)
"Given a string, create a (lazy) stream of graphemes."
(with-input-from-string str
(λ () (input->grapheme-stream (current-input-port)))))
(define-stream (input->grapheme-stream port)
(unless port
(set! port (current-input-port)))
"Given an input port, create a (lazy) stream of graphemes."
;; The first is what state we are in, and the next is the grapheme
;; property of the current character.
;; Port of https://github.com/rivo/uniseg/blob/master/graphemerules.go
@ -29,88 +35,115 @@
(match (list cur-state cur-prop)
;; Grapheme boundary #3
(('carriage-return 'line-feed)
(values 'control+line-feed 'no-boundary))
(values 'control+line-feed #f))
;; Grapheme boundary #4
((or ('carriage-return _)
('control+line-feed _))
(values 'any 'boundary))
(values 'any #t))
;; Grapheme boundary #5
((_ 'carriage-return)
(values 'carriage-return 'boundary))
(values 'carriage-return #t))
((or (_ 'line-feed)
(_ 'control))
(values 'control+line-feed 'boundary))
(values 'control+line-feed #t))
;; Grapheme boundary #6
(('hangul-syllable-l 'hangul-syllable-l)
(values 'hangul-syllable-l 'no-boundary))
(values 'hangul-syllable-l #f))
((or ('hangul-syllable-l 'hangul-syllable-v)
('hangul-syllable-l 'hangul-syllable-lv))
(values 'hangul-syllable-lv 'no-boundary))
(values 'hangul-syllable-lv #f))
(('hangul-syllable-l 'hangul-syllable-lvt)
(values 'hangul-syllable-lvt 'no-boundary))
(values 'hangul-syllable-lvt #f))
((_ 'hangul-syllable-l)
(values 'hangul-syllable-l 'boundary))
(values 'hangul-syllable-l #t))
;; Grapheme boundary #7
(('hangul-syllable-lv 'hangul-syllable-v)
(values 'hangul-syllable-lv 'no-boundary))
(values 'hangul-syllable-lv #f))
(('hangul-syllable-lv 'hangul-syllable-t)
(values 'hangul-syllable-lvt 'no-boundary))
(values 'hangul-syllable-lvt #f))
((or (_ 'hangul-syllable-lv)
(_ 'hangul-syllable-v))
(values 'hangul-syllable-lv 'boundary))
(values 'hangul-syllable-lv #t))
;; Grapheme boundary #8
((or (_ 'hangul-syllable-lvt)
(_ 'hangul-syllable-t))
(values 'hangul-syllable-lvt 'boundary))
(values 'hangul-syllable-lvt #t))
(('hangul-syllable-lvt 'hangul-syllable-t)
(values 'hangul-syllable-lvt 'no-boundary))
(values 'hangul-syllable-lvt #f))
;; Grapheme boundary #9
((or (_ 'extend)
(_ 'zero-width-joiner))
(values 'any 'no-boundary))
(values 'any #f))
;; Grapheme boundary #9a
((_ 'spacing-mark)
(values 'any 'no-boundary))
(values 'any #f))
;; Grapheme boundary #9b
(('prepend _)
(values 'any 'no-boundary))
(values 'any #f))
((_ 'prepend)
(values 'prepend 'boundary))
(values 'prepend #t))
;; Grapheme boundary #11 (emoji!)
(('extended-pictographic 'extend)
(values 'extended-pictographic 'no-boundary))
(values 'extended-pictographic #f))
(('extended-pictographic 'zero-width-joiner)
(values 'extended-pictographic+zero-width-joiner 'no-boundary))
(values 'extended-pictographic+zero-width-joiner #f))
(('extended-pictographic+zero-width-joiner 'extended-pictographic)
(values 'extended-pictographic 'no-boundary))
(values 'extended-pictographic #f))
((_ 'extended-pictographic)
(values 'extended-pictographic 'boundary))
(values 'extended-pictographic #t))
;; Grapheme boundaries #12 and #13
(('regional-indicator-odd 'regional-indicator)
(values 'regioinal-indicator-even 'no-boundary))
(values 'regioinal-indicator-even #f))
(('regional-indicator-even 'regional-indicator)
(values 'regional-indicator-odd 'boundary))
(values 'regional-indicator-odd #t))
((_ 'regional-indicator)
(values 'regional-indicator-odd 'boundary))
(values 'regional-indicator-odd #t))
(else (values 'other 'other))))
(define (transition-state state char)
"Given the current state and the next char, run a state transition"
;; Everything else considered a boundeary
(else (values 'any #t))))
)
(define grapheme-width 0)
(define glyphs-reverse '())
(define grapheme (make-grapheme glyphs width sentence-end? word-end?))
(stream-cons grapheme (input->grapheme-stream port)))
(define hit-eof #f)
(define (iterate-through-grapheme state)
(define glyph (get-char port))
(if
(eof-object? glyph)
(begin
(set! hit-eof #t)
state)
(begin
(set! glyphs-reverse (cons glyph glyphs-reverse))
(let* ((width property (char-width glyph))
(next-state boundary? (state-machine state property)))
(set! grapheme-width (+ grapheme-width width))
(if boundary?
state
(iterate-through-grapheme state))))))
(define final-state (iterate-through-grapheme 'any))
(if
hit-eof
stream-null
(stream-cons
(make-grapheme
(reverse glyphs-reverse)
grapheme-width)
(input->grapheme-stream port))))

View file

@ -21,6 +21,9 @@
file-to-lines
ranges->charset!))
;;
;; Common PEG patterns
;;
(define-peg-pattern @hex body (peg "[a-fA-F0-9]"))
(define-peg-pattern @codepoint all
@ -31,13 +34,14 @@
(and @codepoint (ignore "..") @codepoint)
@codepoint))
(define-peg-pattern @comment all
(and (ignore "#") (* peg-any)))
(define-peg-pattern @ws none
(or " " "\t"))
;; Helper macro to add a list of character ranges
;; to a hash-set.
(define-syntax-rule (cons-hash-list! ht key low high)
(let* ((old (hashq-ref ht key))
(value (list low high))
@ -47,6 +51,8 @@
(list value))))
(hashq-set! ht key new-lst)))
;; Helper macro to take above character range hash table
;; and insert it into Guile's chasracter set system.
(define-syntax-rule (ranges->charset! ht name symbol)
(let* ((pairs (hashq-ref ht name)))
(for-each
@ -64,15 +70,17 @@
;; So they can't be written out as such.
(locale-string->integer str 16))
;; This should be in the standard library...
(define (format-exception-msg port err)
(apply format port (exception-message err) (exception-irritants err))
(display "\n" port))
;; Little checker to see if we are in the danger zone
;; (surrogate range is invalid unicode and guile errors)
(define (in-surrogate-range num)
(and (>= num #xd800)
(<= num #xdfff)))
(define* (wget-to-lines url #:optional (port #f))
(format port "Downloading from ~a..." url)