gnu: libxml2: Update to 2.9.12.

* gnu/packages/patches/libxml2-parent-pointers.patch,
gnu/packages/patches/libxml2-terminating-newline.patch,
gnu/packages/patches/libxml2-xpath-recursion-limit.patch: New files.
* gnu/packages/patches/python-libxml2-python39-compat.patch: Delete file.
* gnu/local.mk (dist_patch_DATA): Adjust accordingly.
* gnu/packages/xml.scm (libxml2): Update to 2.9.12.
(python-lxml)[source](modules, snippet): New fields.
This commit is contained in:
Marius Bakke 2021-07-19 14:11:12 +02:00
parent 11fb5f45a8
commit abe5ffbbfe
No known key found for this signature in database
GPG key ID: A2A06DF2A33A54FA
6 changed files with 300 additions and 104 deletions

View file

@ -1356,6 +1356,9 @@ dist_patch_DATA = \
%D%/packages/patches/libutils-remove-damaging-includes.patch \
%D%/packages/patches/libvdpau-va-gl-unbundle.patch \
%D%/packages/patches/libvpx-CVE-2016-2818.patch \
%D%/packages/patches/libxml2-parent-pointers.patch \
%D%/packages/patches/libxml2-terminating-newline.patch \
%D%/packages/patches/libxml2-xpath-recursion-limit.patch \
%D%/packages/patches/libxml2-xpath0-Add-option-xpath0.patch \
%D%/packages/patches/libxslt-generated-ids.patch \
%D%/packages/patches/libxt-guix-search-paths.patch \
@ -1587,7 +1590,6 @@ dist_patch_DATA = \
%D%/packages/patches/python-pep8-stdlib-tokenize-compat.patch \
%D%/packages/patches/python-pyfakefs-remove-bad-test.patch \
%D%/packages/patches/python-flint-includes.patch \
%D%/packages/patches/python-libxml2-python39-compat.patch \
%D%/packages/patches/python-libxml2-utf8.patch \
%D%/packages/patches/python-matplotlib-run-under-wayland-gtk3.patch \
%D%/packages/patches/python-mediafile-wavpack.patch \

View file

@ -0,0 +1,228 @@
Fix a regression in 2.9.12 where some corrupt XML structures were handled
incorrectly:
https://gitlab.gnome.org/GNOME/libxml2/-/issues/255
This is an amalgamation of these upstream commits:
https://gitlab.gnome.org/GNOME/libxml2/-/commit/85b1792e37b131e7a51af98a37f92472e8de5f3f
https://gitlab.gnome.org/GNOME/libxml2/-/commit/13ad8736d294536da4cbcd70a96b0a2fbf47070c
diff --git a/HTMLtree.c b/HTMLtree.c
--- a/HTMLtree.c
+++ b/HTMLtree.c
@@ -744,7 +744,7 @@ void
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
int format) {
- xmlNodePtr root;
+ xmlNodePtr root, parent;
xmlAttrPtr attr;
const htmlElemDesc * info;
@@ -755,6 +755,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
}
root = cur;
+ parent = cur->parent;
while (1) {
switch (cur->type) {
case XML_HTML_DOCUMENT_NODE:
@@ -762,13 +763,25 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
if (((xmlDocPtr) cur)->intSubset != NULL) {
htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
}
- if (cur->children != NULL) {
+ /* Always validate cur->parent when descending. */
+ if ((cur->parent == parent) && (cur->children != NULL)) {
+ parent = cur;
cur = cur->children;
continue;
}
break;
case XML_ELEMENT_NODE:
+ /*
+ * Some users like lxml are known to pass nodes with a corrupted
+ * tree structure. Fall back to a recursive call to handle this
+ * case.
+ */
+ if ((cur->parent != parent) && (cur->children != NULL)) {
+ htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
+ break;
+ }
+
/*
* Get specific HTML info for that node.
*/
@@ -817,6 +830,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
(cur->name != NULL) &&
(cur->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
+ parent = cur;
cur = cur->children;
continue;
}
@@ -825,9 +839,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
(info != NULL) && (!info->isinline)) {
if ((cur->next->type != HTML_TEXT_NODE) &&
(cur->next->type != HTML_ENTITY_REF_NODE) &&
- (cur->parent != NULL) &&
- (cur->parent->name != NULL) &&
- (cur->parent->name[0] != 'p')) /* p, pre, param */
+ (parent != NULL) &&
+ (parent->name != NULL) &&
+ (parent->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
}
@@ -842,9 +856,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
break;
if (((cur->name == (const xmlChar *)xmlStringText) ||
(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
- ((cur->parent == NULL) ||
- ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
- (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
+ ((parent == NULL) ||
+ ((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
+ (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
xmlChar *buffer;
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
@@ -902,13 +916,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
break;
}
- /*
- * The parent should never be NULL here but we want to handle
- * corrupted documents gracefully.
- */
- if (cur->parent == NULL)
- return;
- cur = cur->parent;
+ cur = parent;
+ /* cur->parent was validated when descending. */
+ parent = cur->parent;
if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
(cur->type == XML_DOCUMENT_NODE)) {
@@ -939,9 +949,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
(cur->next != NULL)) {
if ((cur->next->type != HTML_TEXT_NODE) &&
(cur->next->type != HTML_ENTITY_REF_NODE) &&
- (cur->parent != NULL) &&
- (cur->parent->name != NULL) &&
- (cur->parent->name[0] != 'p')) /* p, pre, param */
+ (parent != NULL) &&
+ (parent->name != NULL) &&
+ (parent->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
}
}
diff --git a/xmlsave.c b/xmlsave.c
--- a/xmlsave.c
+++ b/xmlsave.c
@@ -847,7 +847,7 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
static void
xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
int format = ctxt->format;
- xmlNodePtr tmp, root, unformattedNode = NULL;
+ xmlNodePtr tmp, root, unformattedNode = NULL, parent;
xmlAttrPtr attr;
xmlChar *start, *end;
xmlOutputBufferPtr buf;
@@ -856,6 +856,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
buf = ctxt->buf;
root = cur;
+ parent = cur->parent;
while (1) {
switch (cur->type) {
case XML_DOCUMENT_NODE:
@@ -868,7 +869,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
break;
case XML_DOCUMENT_FRAG_NODE:
- if (cur->children != NULL) {
+ /* Always validate cur->parent when descending. */
+ if ((cur->parent == parent) && (cur->children != NULL)) {
+ parent = cur;
cur = cur->children;
continue;
}
@@ -887,7 +890,18 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
break;
case XML_ELEMENT_NODE:
- if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput))
+ /*
+ * Some users like lxml are known to pass nodes with a corrupted
+ * tree structure. Fall back to a recursive call to handle this
+ * case.
+ */
+ if ((cur->parent != parent) && (cur->children != NULL)) {
+ xmlNodeDumpOutputInternal(ctxt, cur);
+ break;
+ }
+
+ if ((ctxt->level > 0) && (ctxt->format == 1) &&
+ (xmlIndentTreeOutput))
xmlOutputBufferWrite(buf, ctxt->indent_size *
(ctxt->level > ctxt->indent_nr ?
ctxt->indent_nr : ctxt->level),
@@ -942,6 +956,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
xmlOutputBufferWrite(buf, 1, ">");
if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n");
if (ctxt->level >= 0) ctxt->level++;
+ parent = cur;
cur = cur->children;
continue;
}
@@ -1058,13 +1073,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
break;
}
- /*
- * The parent should never be NULL here but we want to handle
- * corrupted documents gracefully.
- */
- if (cur->parent == NULL)
- return;
- cur = cur->parent;
+ cur = parent;
+ /* cur->parent was validated when descending. */
+ parent = cur->parent;
if (cur->type == XML_ELEMENT_NODE) {
if (ctxt->level > 0) ctxt->level--;
diff --git a/xmlsave.c b/xmlsave.c
--- a/xmlsave.c
+++ b/xmlsave.c
@@ -890,6 +890,13 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
break;
case XML_ELEMENT_NODE:
+ if ((cur != root) && (ctxt->format == 1) &&
+ (xmlIndentTreeOutput))
+ xmlOutputBufferWrite(buf, ctxt->indent_size *
+ (ctxt->level > ctxt->indent_nr ?
+ ctxt->indent_nr : ctxt->level),
+ ctxt->indent);
+
/*
* Some users like lxml are known to pass nodes with a corrupted
* tree structure. Fall back to a recursive call to handle this
@@ -900,13 +907,6 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
break;
}
- if ((ctxt->level > 0) && (ctxt->format == 1) &&
- (xmlIndentTreeOutput))
- xmlOutputBufferWrite(buf, ctxt->indent_size *
- (ctxt->level > ctxt->indent_nr ?
- ctxt->indent_nr : ctxt->level),
- ctxt->indent);
-
xmlOutputBufferWrite(buf, 1, "<");
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);

View file

@ -0,0 +1,33 @@
Fix a regression in 2.9.12 where serializing empty HTML documents would
not add a terminating newline.
https://gitlab.gnome.org/GNOME/libxml2/-/issues/266
Taken from upstream:
https://gitlab.gnome.org/GNOME/libxml2/-/commit/92d9ab4c28842a09ca2b76d3ff2f933e01b6cd6f
diff --git a/HTMLtree.c b/HTMLtree.c
--- a/HTMLtree.c
+++ b/HTMLtree.c
@@ -763,11 +763,15 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
if (((xmlDocPtr) cur)->intSubset != NULL) {
htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
}
- /* Always validate cur->parent when descending. */
- if ((cur->parent == parent) && (cur->children != NULL)) {
- parent = cur;
- cur = cur->children;
- continue;
+ if (cur->children != NULL) {
+ /* Always validate cur->parent when descending. */
+ if (cur->parent == parent) {
+ parent = cur;
+ cur = cur->children;
+ continue;
+ }
+ } else {
+ xmlOutputBufferWriteString(buf, "\n");
}
break;

View file

@ -0,0 +1,20 @@
Fix recursion accounting in XPath expressions:
https://gitlab.gnome.org/GNOME/libxml2/-/issues/264
Taken from upstream:
https://gitlab.gnome.org/GNOME/libxml2/-/commit/3e1aad4fe584747fd7d17cc7b2863a78e2d21a77
diff --git a/xpath.c b/xpath.c
--- a/xpath.c
+++ b/xpath.c
@@ -10983,7 +10983,7 @@ xmlXPathCompileExpr(xmlXPathParserContextPtr ctxt, int sort) {
}
if (xpctxt != NULL)
- xpctxt->depth -= 1;
+ xpctxt->depth -= 10;
}
/**

View file

@ -1,94 +0,0 @@
https://gitlab.gnome.org/GNOME/libxml2/-/commit/e4fb36841800038c289997432ca547c9bfef9db1.patch
From e4fb36841800038c289997432ca547c9bfef9db1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= <miro@hroncok.cz>
Date: Fri, 28 Feb 2020 12:48:14 +0100
Subject: [PATCH] Parenthesize Py<type>_Check() in ifs
In C, if expressions should be parenthesized.
PyLong_Check, PyUnicode_Check etc. happened to expand to a parenthesized
expression before, but that's not API to rely on.
Since Python 3.9.0a4 it needs to be parenthesized explicitly.
Fixes https://gitlab.gnome.org/GNOME/libxml2/issues/149
---
python/libxml.c | 4 ++--
python/types.c | 12 ++++++------
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/python/libxml.c b/python/libxml.c
index bc676c4e..81e709f3 100644
--- a/python/libxml.c
+++ b/python/libxml.c
@@ -294,7 +294,7 @@ xmlPythonFileReadRaw (void * context, char * buffer, int len) {
lenread = PyBytes_Size(ret);
data = PyBytes_AsString(ret);
#ifdef PyUnicode_Check
- } else if PyUnicode_Check (ret) {
+ } else if (PyUnicode_Check (ret)) {
#if PY_VERSION_HEX >= 0x03030000
Py_ssize_t size;
const char *tmp;
@@ -359,7 +359,7 @@ xmlPythonFileRead (void * context, char * buffer, int len) {
lenread = PyBytes_Size(ret);
data = PyBytes_AsString(ret);
#ifdef PyUnicode_Check
- } else if PyUnicode_Check (ret) {
+ } else if (PyUnicode_Check (ret)) {
#if PY_VERSION_HEX >= 0x03030000
Py_ssize_t size;
const char *tmp;
diff --git a/python/types.c b/python/types.c
index c2bafeb1..ed284ec7 100644
--- a/python/types.c
+++ b/python/types.c
@@ -602,16 +602,16 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj)
if (obj == NULL) {
return (NULL);
}
- if PyFloat_Check (obj) {
+ if (PyFloat_Check (obj)) {
ret = xmlXPathNewFloat((double) PyFloat_AS_DOUBLE(obj));
- } else if PyLong_Check(obj) {
+ } else if (PyLong_Check(obj)) {
#ifdef PyLong_AS_LONG
ret = xmlXPathNewFloat((double) PyLong_AS_LONG(obj));
#else
ret = xmlXPathNewFloat((double) PyInt_AS_LONG(obj));
#endif
#ifdef PyBool_Check
- } else if PyBool_Check (obj) {
+ } else if (PyBool_Check (obj)) {
if (obj == Py_True) {
ret = xmlXPathNewBoolean(1);
@@ -620,14 +620,14 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj)
ret = xmlXPathNewBoolean(0);
}
#endif
- } else if PyBytes_Check (obj) {
+ } else if (PyBytes_Check (obj)) {
xmlChar *str;
str = xmlStrndup((const xmlChar *) PyBytes_AS_STRING(obj),
PyBytes_GET_SIZE(obj));
ret = xmlXPathWrapString(str);
#ifdef PyUnicode_Check
- } else if PyUnicode_Check (obj) {
+ } else if (PyUnicode_Check (obj)) {
#if PY_VERSION_HEX >= 0x03030000
xmlChar *str;
const char *tmp;
@@ -650,7 +650,7 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj)
ret = xmlXPathWrapString(str);
#endif
#endif
- } else if PyList_Check (obj) {
+ } else if (PyList_Check (obj)) {
int i;
PyObject *node;
xmlNodePtr cur;
--
GitLab

View file

@ -184,14 +184,17 @@ (define-public libebml
(define-public libxml2
(package
(name "libxml2")
(version "2.9.10")
(version "2.9.12")
(source (origin
(method url-fetch)
(uri (string-append "ftp://xmlsoft.org/libxml2/libxml2-"
version ".tar.gz"))
(sha256
(base32
"07xynh8hcxb2yb1fs051xrgszjvj37wnxvxgsj10rzmqzy9y3zma"))))
"14hxwzmf5xqppx77z7i0ni9lpzg1a84dqpf8j8l1fvy570g6imn8"))
(patches (search-patches "libxml2-parent-pointers.patch"
"libxml2-terminating-newline.patch"
"libxml2-xpath-recursion-limit.patch"))))
(build-system gnu-build-system)
(outputs '("out" "static" "doc"))
(arguments
@ -217,8 +220,7 @@ (define-public libxml2
;; file such that Libtool does the right thing when both
;; the shared and static variants are available.
(substitute* (string-append src "/lib/libxml2.la")
(("^old_library='libxml2.a'") "old_library=''"))
#t))))))
(("^old_library='libxml2.a'") "old_library=''"))))))))
(home-page "http://www.xmlsoft.org/")
(synopsis "C parser for XML")
(inputs `(("xz" ,xz)))
@ -374,8 +376,7 @@ (define-public python-libxml2
(source (origin
(inherit (package-source libxml2))
(patches
(append (search-patches "python-libxml2-python39-compat.patch"
"python-libxml2-utf8.patch")
(append (search-patches "python-libxml2-utf8.patch")
(origin-patches (package-source libxml2))))))
(build-system python-build-system)
(outputs '("out"))
@ -397,8 +398,7 @@ (define-public python-libxml2
(format #f "ROOT = r'~a'" libxml2))
;; For 'iconv.h'.
(("/opt/include")
(string-append glibc "/include"))))
#t)))))
(string-append glibc "/include")))))))))
(inputs `(("libxml2" ,libxml2)))
(synopsis "Python bindings for the libxml2 library")))
@ -2603,7 +2603,14 @@ (define-public python-lxml
(method url-fetch)
(uri (pypi-uri "lxml" version))
(sha256
(base32 "0s14r1w2x9sdlcsw8mxiqgw4rz5zs5lpqpxrfyn4a1mkndqqbdrr"))))
(base32 "0s14r1w2x9sdlcsw8mxiqgw4rz5zs5lpqpxrfyn4a1mkndqqbdrr"))
;; Adapt a test to libxml2 2.9.12, taken from this commit:
;; https://github.com/lxml/lxml/commit/852ed1092bd80b6b9a51db24371047e
(modules '((guix build utils)))
(snippet
'(substitute* "src/lxml/tests/test_etree.py"
(("self\\.assertEqual\\(\\{'hha': None\\}, el\\.nsmap\\)")
"self.assertEqual({}, el.nsmap)")))))
(build-system python-build-system)
(arguments
`(#:phases (modify-phases %standard-phases