guix/guix/build/po.scm

194 lines
7.2 KiB
Scheme

;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2019, 2021 Julien Lepiller <julien@lepiller.eu>
;;; Copyright © 2020 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2023 Florian Pelz <pelzflorian@pelzflorian.de>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
(define-module (guix build po)
#:use-module (ice-9 match)
#:use-module (ice-9 peg)
#:use-module (ice-9 regex)
#:use-module (ice-9 textual-ports)
#:use-module (ice-9 vlist)
#:use-module (srfi srfi-1)
#:export (read-po-file
translate-cross-references))
;; A small parser for po files
(define-peg-pattern po-file body (* (or entry whitespace)))
(define-peg-pattern whitespace body (or " " "\t" "\n"))
(define-peg-pattern comment-chr body (range #\space #\頋))
(define-peg-pattern comment none (and "#" (* comment-chr) "\n"))
(define-peg-pattern flags all (and (ignore "#, ") (* comment-chr) (ignore "\n")))
(define-peg-pattern entry all
(and (* (or flags comment (ignore (* whitespace))))
(ignore "msgid ") msgid (ignore (* whitespace))
(ignore "msgstr ") msgstr))
(define-peg-pattern escape body (or "\\\\" "\\\"" "\\n"))
(define-peg-pattern str-chr body (or " " "!" (and (ignore "\\") "\"")
"\\n" (and (ignore "\\") "\\")
(range #\# #\頋)))
(define-peg-pattern msgid all content)
(define-peg-pattern msgstr all content)
(define-peg-pattern content body
(and (ignore "\"") (* str-chr) (ignore "\"")
(? (and (ignore (* whitespace)) content))))
(define (interpret-newline-escape str)
"Replace '\\n' sequences in STR with a newline character."
(let loop ((str str)
(result '()))
(match (string-contains str "\\n")
(#f (string-concatenate-reverse (cons str result)))
(index
(let ((prefix (string-take str index)))
(loop (string-drop str (+ 2 index))
(append (list "\n" prefix) result)))))))
(define (parse-tree->assoc parse-tree)
"Converts a po PARSE-TREE to an association list, where the key is the msgid
and the value is the msgstr. The result only contains non fuzzy strings."
(define (comments->flags comments)
(match comments
(('flags flags)
(map (lambda (flag) (string->symbol (string-trim-both flag #\space)))
(string-split flags #\,)))
((? list? comments)
(fold
(lambda (comment res)
(match comment
((? string? _) res)
(flags
(append (comments->flags flags)
res))))
'()
comments))))
(match parse-tree
(() '())
((entry . parse-tree)
(match entry
((? string? entry)
(parse-tree->assoc parse-tree))
;; empty msgid
(('entry ('msgid ('msgstr msgstr)))
(parse-tree->assoc parse-tree))
;; empty msgstr
(('entry ('msgid msgid) 'msgstr)
(parse-tree->assoc parse-tree))
(('entry _ ('msgid msgid) 'msgstr)
(parse-tree->assoc parse-tree))
(('entry ('msgid msgid) ('msgstr msgstr))
(acons (interpret-newline-escape msgid)
(interpret-newline-escape msgstr)
(parse-tree->assoc parse-tree)))
(('entry ('msgid msgid) ('msgstr msgstr))
(acons (interpret-newline-escape msgid)
(interpret-newline-escape msgstr)
(parse-tree->assoc parse-tree)))
(('entry comments ('msgid msgid) ('msgstr msgstr))
(if (member 'fuzzy (comments->flags comments))
(parse-tree->assoc parse-tree)
(acons (interpret-newline-escape msgid)
(interpret-newline-escape msgstr)
(parse-tree->assoc parse-tree))))))))
(define (read-po-file port)
"Read a .po file from PORT and return an alist of msgid and msgstr."
(let ((tree (peg:tree (match-pattern
po-file
(get-string-all port)))))
(parse-tree->assoc tree)))
(define (canonicalize-whitespace str)
"Change whitespace (newlines, etc.) in STR to @code{#\\space}."
(string-map (lambda (chr)
(if (char-set-contains? char-set:whitespace chr)
#\space
chr))
str))
(define xref-regexp
;; Texinfo cross-reference regexp.
(make-regexp
(string-append "@(px|x)?ref\\{([^,}]+)("
"\\}" ;Match xref with one argument
"|,[^,}]*\\}" ;or two arguments
"|,[^,}]*,[^,}]*\\}" ;or three arguments
;; or with an *empty* fourth argument:
"|,[^,}]*,[^,}]*, *,"
"|,[^,}]*,[^,}]*, *\\}"
")")))
(define (translate-cross-references texi pofile)
"Translate the cross-references that appear in @var{texi}, the initial
translation of a Texinfo file, using the msgid/msgstr pairs from @var{pofile}."
(define translations
(call-with-input-file pofile read-po-file))
(define content
(call-with-input-file texi get-string-all))
(define matches
(list-matches xref-regexp content))
(define translation-map
(fold (match-lambda*
(((msgid . str) result)
(vhash-cons msgid str result)))
vlist-null
translations))
(define translated
;; Iterate over MATCHES and replace cross-references with their
;; translation found in TRANSLATION-MAP. (We can't use
;; 'substitute*' because matches can span multiple lines.)
(let loop ((matches matches)
(offset 0)
(result '()))
(match matches
(()
(string-concatenate-reverse
(cons (string-drop content offset) result)))
((head . tail)
(let ((prefix (match:substring head 1))
(ref (canonicalize-whitespace (match:substring head 2)))
(rest (match:substring head 3)))
(define translated
(string-append "@" (or prefix "")
"ref{"
(match (vhash-assoc ref translation-map)
(#f ref)
((_ . str) str))
(or rest "")))
(loop tail
(match:end head)
(append (list translated
(string-take
(string-drop content offset)
(- (match:start head) offset)))
result)))))))
(format (current-error-port)
"translated ~a cross-references in '~a'~%"
(length matches) texi)
(call-with-output-file texi
(lambda (port)
(display translated port))))