diff options
author | Eric van Gyzen <vangyzen@FreeBSD.org> | 2022-03-02 20:00:38 +0000 |
---|---|---|
committer | Eric van Gyzen <vangyzen@FreeBSD.org> | 2022-03-02 20:00:38 +0000 |
commit | d1b143ee9a5c249312ffa20988d3d91830fab7eb (patch) | |
tree | cc9aa8d5370df96926e196ea711b819e8cb132b4 | |
parent | f83ac37f1e664a1af0d775ac6b2095f1cf049abd (diff) |
Vendor import of expat 2.4.6vendor/expat/2.4.6
Sponsored by: Dell EMC Isilon
-rw-r--r-- | Changes | 95 | ||||
-rw-r--r-- | Makefile.in | 1 | ||||
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | configure.ac | 12 | ||||
-rw-r--r-- | doc/Makefile.am | 3 | ||||
-rw-r--r-- | doc/Makefile.in | 4 | ||||
-rw-r--r-- | doc/reference.html | 2 | ||||
-rw-r--r-- | doc/xmlwf.1 | 2 | ||||
-rw-r--r-- | doc/xmlwf.xml | 4 | ||||
-rw-r--r-- | examples/Makefile.in | 1 | ||||
-rw-r--r-- | examples/elements.c | 2 | ||||
-rw-r--r-- | examples/outline.c | 2 | ||||
-rw-r--r-- | lib/Makefile.in | 1 | ||||
-rw-r--r-- | lib/expat.h | 2 | ||||
-rw-r--r-- | lib/xmlparse.c | 195 | ||||
-rw-r--r-- | lib/xmlrole.c | 2 | ||||
-rw-r--r-- | lib/xmltok.c | 9 | ||||
-rw-r--r-- | lib/xmltok_impl.c | 20 | ||||
-rw-r--r-- | tests/Makefile.in | 1 | ||||
-rw-r--r-- | tests/benchmark/Makefile.in | 1 | ||||
-rw-r--r-- | tests/benchmark/benchmark.c | 2 | ||||
-rw-r--r-- | tests/runtests.c | 251 | ||||
-rw-r--r-- | xmlwf/Makefile.in | 1 | ||||
-rw-r--r-- | xmlwf/xmlfile.c | 2 | ||||
-rw-r--r-- | xmlwf/xmlwf.c | 8 |
25 files changed, 529 insertions, 96 deletions
@@ -2,6 +2,101 @@ NOTE: We are looking for help with a few things: https://github.com/libexpat/libexpat/labels/help%20wanted If you can help, please get in touch. Thanks! +Release 2.4.6 Sun February 20 2022 + Bug fixes: + #566 Fix a regression introduced by the fix for CVE-2022-25313 + in release 2.4.5 that affects applications that (1) + call function XML_SetElementDeclHandler and (2) are + parsing XML that contains nested element declarations + (e.g. "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>"). + + Other changes: + #567 #568 Version info bumped from 9:5:8 to 9:6:8; + see https://verbump.de/ for what these numbers do + + Special thanks to: + Matt Sergeant + Samanta Navarro + Sergei Trofimovich + and + NixOS + Perl XML::Parser + +Release 2.4.5 Fri February 18 2022 + Security fixes: + #562 CVE-2022-25235 -- Passing malformed 2- and 3-byte UTF-8 + sequences (e.g. from start tag names) to the XML + processing application on top of Expat can cause + arbitrary damage (e.g. code execution) depending + on how invalid UTF-8 is handled inside the XML + processor; validation was not their job but Expat's. + Exploits with code execution are known to exist. + #561 CVE-2022-25236 -- Passing (one or more) namespace separator + characters in "xmlns[:prefix]" attribute values + made Expat send malformed tag names to the XML + processor on top of Expat which can cause + arbitrary damage (e.g. code execution) depending + on such unexpectable cases are handled inside the XML + processor; validation was not their job but Expat's. + Exploits with code execution are known to exist. + #558 CVE-2022-25313 -- Fix stack exhaustion in doctype parsing + that could be triggered by e.g. a 2 megabytes + file with a large number of opening braces. + Expected impact is denial of service or potentially + arbitrary code execution. + #560 CVE-2022-25314 -- Fix integer overflow in function copyString; + only affects the encoding name parameter at parser creation + time which is often hardcoded (rather than user input), + takes a value in the gigabytes to trigger, and a 64-bit + machine. Expected impact is denial of service. + #559 CVE-2022-25315 -- Fix integer overflow in function storeRawNames; + needs input in the gigabytes and a 64-bit machine. + Expected impact is denial of service or potentially + arbitrary code execution. + + Other changes: + #557 #564 Version info bumped from 9:4:8 to 9:5:8; + see https://verbump.de/ for what these numbers do + + Special thanks to: + Ivan Fratric + Samanta Navarro + and + Google Project Zero + JetBrains + +Release 2.4.4 Sun January 30 2022 + Security fixes: + #550 CVE-2022-23852 -- Fix signed integer overflow + (undefined behavior) in function XML_GetBuffer + (that is also called by function XML_Parse internally) + for when XML_CONTEXT_BYTES is defined to >0 (which is both + common and default). + Impact is denial of service or more. + #551 CVE-2022-23990 -- Fix unsigned integer overflow in function + doProlog triggered by large content in element type + declarations when there is an element declaration handler + present (from a prior call to XML_SetElementDeclHandler). + Impact is denial of service or more. + + Bug fixes: + #544 #545 xmlwf: Fix a memory leak on output file opening error + + Other changes: + #546 Autotools: Fix broken CMake support under Cygwin + #554 Windows: Add missing files to the installer to fix + compilation with CMake from installed sources + #552 #554 Version info bumped from 9:3:8 to 9:4:8; + see https://verbump.de/ for what these numbers do + + Special thanks to: + Carlo Bramini + hwt0415 + Roland Illig + Samanta Navarro + and + Clang LeakSan and the Clang team + Release 2.4.3 Sun January 16 2022 Security fixes: #531 #534 CVE-2021-45960 -- Fix issues with left shifts by >=29 places diff --git a/Makefile.in b/Makefile.in index 34f900a61630..7c6551fca2cf 100644 --- a/Makefile.in +++ b/Makefile.in @@ -306,6 +306,7 @@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CMAKE_SHARED_LIBRARY_PREFIX = @CMAKE_SHARED_LIBRARY_PREFIX@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ diff --git a/README.md b/README.md index 6fdd6148714b..959c4a6e94a7 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [](https://github.com/libexpat/libexpat/releases) -# Expat, Release 2.4.3 +# Expat, Release 2.4.6 This is Expat, a C library for parsing XML, started by [James Clark](https://en.wikipedia.org/wiki/James_Clark_%28programmer%29) in 1997. diff --git a/configure.ac b/configure.ac index c285a2b60ed7..5175487bb4b3 100644 --- a/configure.ac +++ b/configure.ac @@ -82,7 +82,7 @@ dnl If the API changes incompatibly set LIBAGE back to 0 dnl LIBCURRENT=9 # sync -LIBREVISION=3 # with +LIBREVISION=6 # with LIBAGE=8 # CMakeLists.txt! AC_CONFIG_HEADERS([expat_config.h]) @@ -395,9 +395,17 @@ AC_SUBST([AM_CFLAGS]) AC_SUBST([AM_CXXFLAGS]) AC_SUBST([AM_LDFLAGS]) +dnl Emulate the use of CMAKE_SHARED_LIBRARY_PREFIX under CMake +AC_MSG_CHECKING([for shared library name prefix]) +AS_CASE("${host_os}", + [cygwin*], [CMAKE_SHARED_LIBRARY_PREFIX=cyg], + [CMAKE_SHARED_LIBRARY_PREFIX=lib]) +AC_MSG_RESULT([${CMAKE_SHARED_LIBRARY_PREFIX}]) +AC_SUBST([CMAKE_SHARED_LIBRARY_PREFIX]) + AS_CASE("${host_os}", [darwin*], [CMAKE_NOCONFIG_SOURCE=cmake/autotools/expat-noconfig__macos.cmake.in], - [mingw*], [CMAKE_NOCONFIG_SOURCE=cmake/autotools/expat-noconfig__windows.cmake.in], + [mingw*|cygwin*], [CMAKE_NOCONFIG_SOURCE=cmake/autotools/expat-noconfig__windows.cmake.in], [CMAKE_NOCONFIG_SOURCE=cmake/autotools/expat-noconfig__linux.cmake.in]) AC_CONFIG_FILES([Makefile] [expat.pc] diff --git a/doc/Makefile.am b/doc/Makefile.am index 16987e8dec40..c3a3ce59c1b9 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -6,7 +6,7 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2021 Sebastian Pipping <sebastian@pipping.org> +# Copyright (c) 2017-2022 Sebastian Pipping <sebastian@pipping.org> # Copyright (c) 2017 Stephen Groat <stephen@groat.us> # Copyright (c) 2017 Joe Orton <jorton@redhat.com> # Licensed under the MIT license: @@ -57,5 +57,4 @@ EXTRA_DIST = \ ok.min.css \ reference.html \ style.css \ - valid-xhtml10.png \ xmlwf.xml diff --git a/doc/Makefile.in b/doc/Makefile.in index c48834052935..9c7d76da2ce0 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -22,7 +22,7 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2021 Sebastian Pipping <sebastian@pipping.org> +# Copyright (c) 2017-2022 Sebastian Pipping <sebastian@pipping.org> # Copyright (c) 2017 Stephen Groat <stephen@groat.us> # Copyright (c) 2017 Joe Orton <jorton@redhat.com> # Licensed under the MIT license: @@ -209,6 +209,7 @@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CMAKE_SHARED_LIBRARY_PREFIX = @CMAKE_SHARED_LIBRARY_PREFIX@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ @@ -344,7 +345,6 @@ EXTRA_DIST = \ ok.min.css \ reference.html \ style.css \ - valid-xhtml10.png \ xmlwf.xml all: all-am diff --git a/doc/reference.html b/doc/reference.html index 1629413e541e..26db5a635479 100644 --- a/doc/reference.html +++ b/doc/reference.html @@ -49,7 +49,7 @@ <div> <h1> The Expat XML Parser - <small>Release 2.4.3</small> + <small>Release 2.4.6</small> </h1> </div> <div class="content"> diff --git a/doc/xmlwf.1 b/doc/xmlwf.1 index f76353173f62..f931d63d4e1d 100644 --- a/doc/xmlwf.1 +++ b/doc/xmlwf.1 @@ -5,7 +5,7 @@ \\$2 \(la\\$1\(ra\\$3 .. .if \n(.g .mso www.tmac -.TH XMLWF 1 "January 16, 2022" "" "" +.TH XMLWF 1 "February 20, 2022" "" "" .SH NAME xmlwf \- Determines if an XML document is well-formed .SH SYNOPSIS diff --git a/doc/xmlwf.xml b/doc/xmlwf.xml index 804fc5620eda..79ed58569ea9 100644 --- a/doc/xmlwf.xml +++ b/doc/xmlwf.xml @@ -21,8 +21,8 @@ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [ <!ENTITY dhfirstname "<firstname>Scott</firstname>"> <!ENTITY dhsurname "<surname>Bronson</surname>"> - <!-- Please adjust the date whenever revising the manpage. --> - <!ENTITY dhdate "<date>January 16, 2022</date>"> + <!ENTITY dhdate "<date>February 20, 2022</date>"> + <!-- Please adjust this^^ date whenever cutting a new release. --> <!ENTITY dhsection "<manvolnum>1</manvolnum>"> <!ENTITY dhemail "<email>bronson@rinspin.com</email>"> <!ENTITY dhusername "Scott Bronson"> diff --git a/examples/Makefile.in b/examples/Makefile.in index d476e53526bd..8528d439290b 100644 --- a/examples/Makefile.in +++ b/examples/Makefile.in @@ -230,6 +230,7 @@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CMAKE_SHARED_LIBRARY_PREFIX = @CMAKE_SHARED_LIBRARY_PREFIX@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ diff --git a/examples/elements.c b/examples/elements.c index 2b6645610771..481d44472686 100644 --- a/examples/elements.c +++ b/examples/elements.c @@ -13,7 +13,7 @@ Copyright (c) 1997-2000 Thai Open Source Software Center Ltd Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net> - Copyright (c) 2005-2007 Steven Solie <ssolie@users.sourceforge.net> + Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> Copyright (c) 2016-2019 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> Copyright (c) 2019 Zhongyuan Zhou <zhouzhongyuan@huawei.com> diff --git a/examples/outline.c b/examples/outline.c index 4ed041febb37..936f0e09053f 100644 --- a/examples/outline.c +++ b/examples/outline.c @@ -10,7 +10,7 @@ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> - Copyright (c) 2005-2007 Steven Solie <ssolie@users.sourceforge.net> + Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> Copyright (c) 2005-2006 Karl Waclawek <karl@waclawek.net> Copyright (c) 2016-2019 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> diff --git a/lib/Makefile.in b/lib/Makefile.in index d4069ebe983d..3581b6bf66b0 100644 --- a/lib/Makefile.in +++ b/lib/Makefile.in @@ -268,6 +268,7 @@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CMAKE_SHARED_LIBRARY_PREFIX = @CMAKE_SHARED_LIBRARY_PREFIX@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ diff --git a/lib/expat.h b/lib/expat.h index 0f021e25def1..46a0e1bcd22d 100644 --- a/lib/expat.h +++ b/lib/expat.h @@ -1041,7 +1041,7 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( */ #define XML_MAJOR_VERSION 2 #define XML_MINOR_VERSION 4 -#define XML_MICRO_VERSION 3 +#define XML_MICRO_VERSION 6 #ifdef __cplusplus } diff --git a/lib/xmlparse.c b/lib/xmlparse.c index b2f5fc6bee77..7db28d07acbc 100644 --- a/lib/xmlparse.c +++ b/lib/xmlparse.c @@ -1,4 +1,4 @@ -/* 9ca2a2fedc35bcb13ba9a134ba5e173020bc2ff5f5a311abf742cec7da1ff26a (2.4.3+) +/* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -11,7 +11,7 @@ Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net> Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> - Copyright (c) 2005-2009 Steven Solie <ssolie@users.sourceforge.net> + Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> Copyright (c) 2016 Eric Rahm <erahm@mozilla.com> Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2016 Gaurav <g.gupta@samsung.com> @@ -33,6 +33,7 @@ Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org> Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org> Copyright (c) 2021 Dong-hee Na <donghee.na@python.org> + Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -717,8 +718,7 @@ XML_ParserCreate(const XML_Char *encodingName) { XML_Parser XMLCALL XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { - XML_Char tmp[2]; - *tmp = nsSep; + XML_Char tmp[2] = {nsSep, 0}; return XML_ParserCreate_MM(encodingName, NULL, tmp); } @@ -974,7 +974,7 @@ parserCreate(const XML_Char *encodingName, if (memsuite) { XML_Memory_Handling_Suite *mtemp; - parser = (XML_Parser)memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); + parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); if (parser != NULL) { mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); mtemp->malloc_fcn = memsuite->malloc_fcn; @@ -1343,8 +1343,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, would be otherwise. */ if (parser->m_ns) { - XML_Char tmp[2]; - *tmp = parser->m_namespaceSeparator; + XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); } else { parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); @@ -2067,6 +2066,11 @@ XML_GetBuffer(XML_Parser parser, int len) { keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); if (keep > XML_CONTEXT_BYTES) keep = XML_CONTEXT_BYTES; + /* Detect and prevent integer overflow */ + if (keep > INT_MAX - neededSize) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } neededSize += keep; #endif /* defined XML_CONTEXT_BYTES */ if (neededSize @@ -2557,6 +2561,7 @@ storeRawNames(XML_Parser parser) { while (tag) { int bufSize; int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); + size_t rawNameLen; char *rawNameBuf = tag->buf + nameLen; /* Stop if already stored. Since m_tagStack is a stack, we can stop at the first entry that has already been copied; everything @@ -2568,7 +2573,11 @@ storeRawNames(XML_Parser parser) { /* For re-use purposes we need to ensure that the size of tag->buf is a multiple of sizeof(XML_Char). */ - bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); + rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); + /* Detect and prevent integer overflow. */ + if (rawNameLen > (size_t)INT_MAX - nameLen) + return XML_FALSE; + bufSize = nameLen + (int)rawNameLen; if (bufSize > tag->bufEnd - tag->buf) { char *temp = (char *)REALLOC(parser, tag->buf, bufSize); if (temp == NULL) @@ -3750,6 +3759,17 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, if (! mustBeXML && isXMLNS && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) isXMLNS = XML_FALSE; + + // NOTE: While Expat does not validate namespace URIs against RFC 3986, + // we have to at least make sure that the XML processor on top of + // Expat (that is splitting tag names by namespace separator into + // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused + // by an attacker putting additional namespace separator characters + // into namespace declarations. That would be ambiguous and not to + // be expected. + if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) { + return XML_ERROR_SYNTAX; + } } isXML = isXML && len == xmlLen; isXMLNS = isXMLNS && len == xmlnsLen; @@ -4092,7 +4112,7 @@ initializeEncoding(XML_Parser parser) { const char *s; #ifdef XML_UNICODE char encodingBuf[128]; - /* See comments abount `protoclEncodingName` in parserInit() */ + /* See comments about `protocolEncodingName` in parserInit() */ if (! parser->m_protocolEncodingName) s = NULL; else { @@ -5367,7 +5387,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, if (dtd->in_eldecl) { ELEMENT_TYPE *el; const XML_Char *name; - int nameLen; + size_t nameLen; const char *nxt = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar); int myindex = nextScaffoldPart(parser); @@ -5383,7 +5403,13 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, nameLen = 0; for (; name[nameLen++];) ; - dtd->contentStringLen += nameLen; + + /* Detect and prevent integer overflow */ + if (nameLen > UINT_MAX - dtd->contentStringLen) { + return XML_ERROR_NO_MEMORY; + } + + dtd->contentStringLen += (unsigned)nameLen; if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; } @@ -6536,7 +6562,7 @@ normalizePublicId(XML_Char *publicId) { static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms) { - DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD)); + DTD *p = ms->malloc_fcn(sizeof(DTD)); if (p == NULL) return p; poolInit(&(p->pool), ms); @@ -6709,8 +6735,8 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, if (! newE) return 0; if (oldE->nDefaultAtts) { - newE->defaultAtts = (DEFAULT_ATTRIBUTE *)ms->malloc_fcn( - oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); + newE->defaultAtts + = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); if (! newE->defaultAtts) { return 0; } @@ -6872,7 +6898,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { /* table->size is a power of 2 */ table->size = (size_t)1 << INIT_POWER; tsize = table->size * sizeof(NAMED *); - table->v = (NAMED **)table->mem->malloc_fcn(tsize); + table->v = table->mem->malloc_fcn(tsize); if (! table->v) { table->size = 0; return NULL; @@ -6912,7 +6938,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { } size_t tsize = newSize * sizeof(NAMED *); - NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize); + NAMED **newV = table->mem->malloc_fcn(tsize); if (! newV) return NULL; memset(newV, 0, tsize); @@ -6941,7 +6967,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { } } } - table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize); + table->v[i] = table->mem->malloc_fcn(createSize); if (! table->v[i]) return NULL; memset(table->v[i], 0, createSize); @@ -7229,7 +7255,7 @@ poolGrow(STRING_POOL *pool) { if (bytesToAllocate == 0) return XML_FALSE; - tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate); + tem = pool->mem->malloc_fcn(bytesToAllocate); if (! tem) return XML_FALSE; tem->size = blockSize; @@ -7305,44 +7331,15 @@ nextScaffoldPart(XML_Parser parser) { return next; } -static void -build_node(XML_Parser parser, int src_node, XML_Content *dest, - XML_Content **contpos, XML_Char **strpos) { - DTD *const dtd = parser->m_dtd; /* save one level of indirection */ - dest->type = dtd->scaffold[src_node].type; - dest->quant = dtd->scaffold[src_node].quant; - if (dest->type == XML_CTYPE_NAME) { - const XML_Char *src; - dest->name = *strpos; - src = dtd->scaffold[src_node].name; - for (;;) { - *(*strpos)++ = *src; - if (! *src) - break; - src++; - } - dest->numchildren = 0; - dest->children = NULL; - } else { - unsigned int i; - int cn; - dest->numchildren = dtd->scaffold[src_node].childcnt; - dest->children = *contpos; - *contpos += dest->numchildren; - for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren; - i++, cn = dtd->scaffold[cn].nextsib) { - build_node(parser, cn, &(dest->children[i]), contpos, strpos); - } - dest->name = NULL; - } -} - static XML_Content * build_model(XML_Parser parser) { + /* Function build_model transforms the existing parser->m_dtd->scaffold + * array of CONTENT_SCAFFOLD tree nodes into a new array of + * XML_Content tree nodes followed by a gapless list of zero-terminated + * strings. */ DTD *const dtd = parser->m_dtd; /* save one level of indirection */ XML_Content *ret; - XML_Content *cpos; - XML_Char *str; + XML_Char *str; /* the current string writing location */ /* Detect and prevent integer overflow. * The preprocessor guard addresses the "always false" warning @@ -7368,10 +7365,96 @@ build_model(XML_Parser parser) { if (! ret) return NULL; - str = (XML_Char *)(&ret[dtd->scaffCount]); - cpos = &ret[1]; + /* What follows is an iterative implementation (of what was previously done + * recursively in a dedicated function called "build_node". The old recursive + * build_node could be forced into stack exhaustion from input as small as a + * few megabyte, and so that was a security issue. Hence, a function call + * stack is avoided now by resolving recursion.) + * + * The iterative approach works as follows: + * + * - We have two writing pointers, both walking up the result array; one does + * the work, the other creates "jobs" for its colleague to do, and leads + * the way: + * + * - The faster one, pointer jobDest, always leads and writes "what job + * to do" by the other, once they reach that place in the + * array: leader "jobDest" stores the source node array index (relative + * to array dtd->scaffold) in field "numchildren". + * + * - The slower one, pointer dest, looks at the value stored in the + * "numchildren" field (which actually holds a source node array index + * at that time) and puts the real data from dtd->scaffold in. + * + * - Before the loop starts, jobDest writes source array index 0 + * (where the root node is located) so that dest will have something to do + * when it starts operation. + * + * - Whenever nodes with children are encountered, jobDest appends + * them as new jobs, in order. As a result, tree node siblings are + * adjacent in the resulting array, for example: + * + * [0] root, has two children + * [1] first child of 0, has three children + * [3] first child of 1, does not have children + * [4] second child of 1, does not have children + * [5] third child of 1, does not have children + * [2] second child of 0, does not have children + * + * Or (the same data) presented in flat array view: + * + * [0] root, has two children + * + * [1] first child of 0, has three children + * [2] second child of 0, does not have children + * + * [3] first child of 1, does not have children + * [4] second child of 1, does not have children + * [5] third child of 1, does not have children + * + * - The algorithm repeats until all target array indices have been processed. + */ + XML_Content *dest = ret; /* tree node writing location, moves upwards */ + XML_Content *const destLimit = &ret[dtd->scaffCount]; + XML_Content *jobDest = ret; /* next free writing location in target array */ + str = (XML_Char *)&ret[dtd->scaffCount]; + + /* Add the starting job, the root node (index 0) of the source tree */ + (jobDest++)->numchildren = 0; + + for (; dest < destLimit; dest++) { + /* Retrieve source tree array index from job storage */ + const int src_node = (int)dest->numchildren; + + /* Convert item */ + dest->type = dtd->scaffold[src_node].type; + dest->quant = dtd->scaffold[src_node].quant; + if (dest->type == XML_CTYPE_NAME) { + const XML_Char *src; + dest->name = str; + src = dtd->scaffold[src_node].name; + for (;;) { + *str++ = *src; + if (! *src) + break; + src++; + } + dest->numchildren = 0; + dest->children = NULL; + } else { + unsigned int i; + int cn; + dest->name = NULL; + dest->numchildren = dtd->scaffold[src_node].childcnt; + dest->children = jobDest; + + /* Append scaffold indices of children to array */ + for (i = 0, cn = dtd->scaffold[src_node].firstchild; + i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) + (jobDest++)->numchildren = (unsigned int)cn; + } + } - build_node(parser, 0, ret, &cpos, &str); return ret; } @@ -7400,7 +7483,7 @@ getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, static XML_Char * copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { - int charsRequired = 0; + size_t charsRequired = 0; XML_Char *result; /* First determine how long the string is */ diff --git a/lib/xmlrole.c b/lib/xmlrole.c index 77746ee42d10..3f0f5c150c62 100644 --- a/lib/xmlrole.c +++ b/lib/xmlrole.c @@ -11,7 +11,7 @@ Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net> Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net> Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> - Copyright (c) 2005-2009 Steven Solie <ssolie@users.sourceforge.net> + Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> Copyright (c) 2019 David Loffredo <loffredo@steptools.com> diff --git a/lib/xmltok.c b/lib/xmltok.c index 502ca1adc33b..c659983b4008 100644 --- a/lib/xmltok.c +++ b/lib/xmltok.c @@ -11,8 +11,8 @@ Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net> Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> - Copyright (c) 2005-2009 Steven Solie <ssolie@users.sourceforge.net> - Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> + Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com> Copyright (c) 2016 Don Lewis <truckman@apache.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> @@ -98,11 +98,6 @@ + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \ & (1u << (((byte)[2]) & 0x1F))) -#define UTF8_GET_NAMING(pages, p, n) \ - ((n) == 2 \ - ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ - : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0)) - /* Detection of invalid UTF-8 sequences is based on Table 3.1B of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ with the additional restriction of not allowing the Unicode diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c index 0430591b4263..4072b06497d1 100644 --- a/lib/xmltok_impl.c +++ b/lib/xmltok_impl.c @@ -10,7 +10,7 @@ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> - Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> Copyright (c) 2018 Benjamin Peterson <benjamin@python.org> Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com> @@ -69,7 +69,7 @@ case BT_LEAD##n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ - if (! IS_NAME_CHAR(enc, ptr, n)) { \ + if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ @@ -98,7 +98,7 @@ case BT_LEAD##n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ - if (! IS_NMSTRT_CHAR(enc, ptr, n)) { \ + if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ @@ -1142,6 +1142,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_LEAD##n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ + if (IS_INVALID_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ ptr += n; \ tok = XML_TOK_NAME; \ @@ -1270,7 +1274,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, switch (BYTE_TYPE(enc, ptr)) { # define LEAD_CASE(n) \ case BT_LEAD##n: \ - ptr += n; \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ break; LEAD_CASE(2) LEAD_CASE(3) @@ -1339,7 +1343,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, switch (BYTE_TYPE(enc, ptr)) { # define LEAD_CASE(n) \ case BT_LEAD##n: \ - ptr += n; \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ break; LEAD_CASE(2) LEAD_CASE(3) @@ -1518,7 +1522,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax, state = inName; \ } # define LEAD_CASE(n) \ - case BT_LEAD##n: \ + case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \ START_NAME ptr += (n - MINBPC(enc)); \ break; LEAD_CASE(2) @@ -1730,7 +1734,7 @@ PREFIX(nameLength)(const ENCODING *enc, const char *ptr) { switch (BYTE_TYPE(enc, ptr)) { # define LEAD_CASE(n) \ case BT_LEAD##n: \ - ptr += n; \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ break; LEAD_CASE(2) LEAD_CASE(3) @@ -1775,7 +1779,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end, switch (BYTE_TYPE(enc, ptr)) { # define LEAD_CASE(n) \ case BT_LEAD##n: \ - ptr += n; \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ pos->columnNumber++; \ break; LEAD_CASE(2) diff --git a/tests/Makefile.in b/tests/Makefile.in index 1d382c34b5bb..024ddd98cf29 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -516,6 +516,7 @@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CMAKE_SHARED_LIBRARY_PREFIX = @CMAKE_SHARED_LIBRARY_PREFIX@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ diff --git a/tests/benchmark/Makefile.in b/tests/benchmark/Makefile.in index 7bf8068b1942..64238f1da99a 100644 --- a/tests/benchmark/Makefile.in +++ b/tests/benchmark/Makefile.in @@ -227,6 +227,7 @@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CMAKE_SHARED_LIBRARY_PREFIX = @CMAKE_SHARED_LIBRARY_PREFIX@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ diff --git a/tests/benchmark/benchmark.c b/tests/benchmark/benchmark.c index ba2bc6e1fa22..2c4eb78e3615 100644 --- a/tests/benchmark/benchmark.c +++ b/tests/benchmark/benchmark.c @@ -7,7 +7,7 @@ |_| XML parser Copyright (c) 2003-2006 Karl Waclawek <karl@waclawek.net> - Copyright (c) 2005-2007 Steven Solie <ssolie@users.sourceforge.net> + Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> Licensed under the MIT license: diff --git a/tests/runtests.c b/tests/runtests.c index d2923ac1fd58..6d6f66909a11 100644 --- a/tests/runtests.c +++ b/tests/runtests.c @@ -8,7 +8,7 @@ Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> - Copyright (c) 2005-2007 Steven Solie <ssolie@users.sourceforge.net> + Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017-2018 Rhodri James <rhodri@wildebeest.org.uk> @@ -2664,6 +2664,82 @@ START_TEST(test_dtd_elements) { } END_TEST +static void XMLCALL +element_decl_check_model(void *userData, const XML_Char *name, + XML_Content *model) { + UNUSED_P(userData); + uint32_t errorFlags = 0; + + /* Expected model array structure is this: + * [0] (type 6, quant 0) + * [1] (type 5, quant 0) + * [3] (type 4, quant 0, name "bar") + * [4] (type 4, quant 0, name "foo") + * [5] (type 4, quant 3, name "xyz") + * [2] (type 4, quant 2, name "zebra") + */ + errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0)); + errorFlags |= ((model != NULL) ? 0 : (1u << 1)); + + errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2)); + errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3)); + errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4)); + errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5)); + errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6)); + + errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7)); + errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8)); + errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9)); + errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10)); + errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11)); + + errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12)); + errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13)); + errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14)); + errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15)); + errorFlags |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16)); + + errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17)); + errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18)); + errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19)); + errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20)); + errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21)); + + errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22)); + errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23)); + errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24)); + errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25)); + errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26)); + + errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27)); + errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28)); + errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29)); + errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30)); + errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31)); + + XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags); + XML_FreeContentModel(g_parser, model); +} + +START_TEST(test_dtd_elements_nesting) { + // Payload inspired by a test in Perl's XML::Parser + const char *text = "<!DOCTYPE foo [\n" + "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n" + "]>\n" + "<foo/>"; + + XML_SetUserData(g_parser, (void *)(uintptr_t)-1); + + XML_SetElementDeclHandler(g_parser, element_decl_check_model); + if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); + + if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0) + fail("Element declaration model regression detected"); +} +END_TEST + /* Test foreign DTD handling */ START_TEST(test_set_foreign_dtd) { const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"; @@ -3847,6 +3923,30 @@ START_TEST(test_get_buffer_2) { } END_TEST +/* Test for signed integer overflow CVE-2022-23852 */ +#if defined(XML_CONTEXT_BYTES) +START_TEST(test_get_buffer_3_overflow) { + XML_Parser parser = XML_ParserCreate(NULL); + assert(parser != NULL); + + const char *const text = "\n"; + const int expectedKeepValue = (int)strlen(text); + + // After this call, variable "keep" in XML_GetBuffer will + // have value expectedKeepValue + if (XML_Parse(parser, text, (int)strlen(text), XML_FALSE /* isFinal */) + == XML_STATUS_ERROR) + xml_failure(parser); + + assert(expectedKeepValue > 0); + if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL) + fail("enlarging buffer not failed"); + + XML_ParserFree(parser); +} +END_TEST +#endif // defined(XML_CONTEXT_BYTES) + /* Test position information macros */ START_TEST(test_byte_info_at_end) { const char *text = "<doc></doc>"; @@ -5974,6 +6074,105 @@ START_TEST(test_utf8_in_cdata_section_2) { } END_TEST +START_TEST(test_utf8_in_start_tags) { + struct test_case { + bool goodName; + bool goodNameStart; + const char *tagName; + }; + + // The idea with the tests below is this: + // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences + // go to isNever and are hence not a concern. + // + // We start with a character that is a valid name character + // (or even name-start character, see XML 1.0r4 spec) and then we flip + // single bits at places where (1) the result leaves the UTF-8 encoding space + // and (2) we stay in the same n-byte sequence family. + // + // The flipped bits are highlighted in angle brackets in comments, + // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped + // the most significant bit to 1 to leave UTF-8 encoding space. + struct test_case cases[] = { + // 1-byte UTF-8: [0xxx xxxx] + {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':' + {false, false, "\xBA"}, // [<1>011 1010] + {true, false, "\x39"}, // [0011 1001] = ASCII nine '9' + {false, false, "\xB9"}, // [<1>011 1001] + + // 2-byte UTF-8: [110x xxxx] [10xx xxxx] + {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] = + // Arabic small waw U+06E5 + {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101] + {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101] + {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101] + {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] = + // combining char U+0301 + {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001] + {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001] + {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001] + + // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx] + {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] = + // Devanagari Letter A U+0905 + {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101] + {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101] + {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101] + {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101] + {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101] + {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] = + // combining char U+0901 + {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001] + {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001] + {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001] + {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001] + {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001] + }; + const bool atNameStart[] = {true, false}; + + size_t i = 0; + char doc[1024]; + size_t failCount = 0; + + for (; i < sizeof(cases) / sizeof(cases[0]); i++) { + size_t j = 0; + for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { + const bool expectedSuccess + = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName; + sprintf(doc, "<%s%s><!--", atNameStart[j] ? "" : "a", cases[i].tagName); + XML_Parser parser = XML_ParserCreate(NULL); + + const enum XML_Status status + = XML_Parse(parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE); + + bool success = true; + if ((status == XML_STATUS_OK) != expectedSuccess) { + success = false; + } + if ((status == XML_STATUS_ERROR) + && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) { + success = false; + } + + if (! success) { + fprintf( + stderr, + "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n", + (unsigned)i + 1u, atNameStart[j] ? " " : "not ", + (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser)); + failCount++; + } + + XML_ParserFree(parser); + } + } + + if (failCount > 0) { + fail("UTF-8 regression detected"); + } +} +END_TEST + /* Test trailing spaces in elements are accepted */ static void XMLCALL record_element_end_handler(void *userData, const XML_Char *name) { @@ -6151,6 +6350,14 @@ START_TEST(test_bad_doctype) { } END_TEST +START_TEST(test_bad_doctype_utf8) { + const char *text = "<!DOCTYPE \xDB\x25" + "doc><doc/>"; // [1101 1011] [<0>010 0101] + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Invalid UTF-8 in DOCTYPE not faulted"); +} +END_TEST + START_TEST(test_bad_doctype_utf16) { const char text[] = /* <!DOCTYPE doc [ \x06f2 ]><doc/> @@ -7196,6 +7403,35 @@ START_TEST(test_ns_double_colon_doctype) { } END_TEST +START_TEST(test_ns_separator_in_uri) { + struct test_case { + enum XML_Status expectedStatus; + const char *doc; + }; + struct test_case cases[] = { + {XML_STATUS_OK, "<doc xmlns='one_two' />"}, + {XML_STATUS_ERROR, "<doc xmlns='one
two' />"}, + }; + + size_t i = 0; + size_t failCount = 0; + for (; i < sizeof(cases) / sizeof(cases[0]); i++) { + XML_Parser parser = XML_ParserCreateNS(NULL, '\n'); + XML_SetElementHandler(parser, dummy_start_element, dummy_end_element); + if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc), + /*isFinal*/ XML_TRUE) + != cases[i].expectedStatus) { + failCount++; + } + XML_ParserFree(parser); + } + + if (failCount) { + fail("Namespace separator handling is broken"); + } +} +END_TEST + /* Control variable; the number of times duff_allocator() will successfully * allocate */ #define ALLOC_ALWAYS_SUCCEED (-1) @@ -7352,7 +7588,7 @@ START_TEST(test_misc_version) { fail("Version mismatch"); #if ! defined(XML_UNICODE) || defined(XML_UNICODE_WCHAR_T) - if (xcstrcmp(version_text, XCS("expat_2.4.3"))) /* needs bump on releases */ + if (xcstrcmp(version_text, XCS("expat_2.4.6"))) /* needs bump on releases */ fail("XML_*_VERSION in expat.h out of sync?\n"); #else /* If we have XML_UNICODE defined but not XML_UNICODE_WCHAR_T @@ -11286,7 +11522,7 @@ START_TEST(test_accounting_precision) { {"<p:e xmlns:p=\"https://domain.invalid/\" />", NULL, NULL, 0, filled_later}, {"<e k=\"&'><"\" />", NULL, NULL, - sizeof(XML_Char) * 5 /* number of predefined entites */, filled_later}, + sizeof(XML_Char) * 5 /* number of predefined entities */, filled_later}, {"<e1 xmlns='https://example.org/'>\n" " <e2 xmlns=''/>\n" "</e1>", @@ -11296,7 +11532,7 @@ START_TEST(test_accounting_precision) { {"<e>text</e>", NULL, NULL, 0, filled_later}, {"<e1><e2>text1<e3/>text2</e2></e1>", NULL, NULL, 0, filled_later}, {"<e>&'><"</e>", NULL, NULL, - sizeof(XML_Char) * 5 /* number of predefined entites */, filled_later}, + sizeof(XML_Char) * 5 /* number of predefined entities */, filled_later}, {"<e>A)</e>", NULL, NULL, 0, filled_later}, /* Prolog */ @@ -11703,6 +11939,7 @@ make_suite(void) { tcase_add_test(tc_basic, test_memory_allocation); tcase_add_test(tc_basic, test_default_current); tcase_add_test(tc_basic, test_dtd_elements); + tcase_add_test(tc_basic, test_dtd_elements_nesting); tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd); tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone); tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd); @@ -11731,6 +11968,9 @@ make_suite(void) { tcase_add_test(tc_basic, test_empty_parse); tcase_add_test(tc_basic, test_get_buffer_1); tcase_add_test(tc_basic, test_get_buffer_2); +#if defined(XML_CONTEXT_BYTES) + tcase_add_test(tc_basic, test_get_buffer_3_overflow); +#endif tcase_add_test(tc_basic, test_byte_info_at_end); tcase_add_test(tc_basic, test_byte_info_at_error); tcase_add_test(tc_basic, test_byte_info_at_cdata); @@ -11814,6 +12054,7 @@ make_suite(void) { tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom); tcase_add_test(tc_basic, test_utf8_in_cdata_section); tcase_add_test(tc_basic, test_utf8_in_cdata_section_2); + tcase_add_test(tc_basic, test_utf8_in_start_tags); tcase_add_test(tc_basic, test_trailing_spaces_in_elements); tcase_add_test(tc_basic, test_utf16_attribute); tcase_add_test(tc_basic, test_utf16_second_attr); @@ -11822,6 +12063,7 @@ make_suite(void) { tcase_add_test(tc_basic, test_bad_attr_desc_keyword); tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16); tcase_add_test(tc_basic, test_bad_doctype); + tcase_add_test(tc_basic, test_bad_doctype_utf8); tcase_add_test(tc_basic, test_bad_doctype_utf16); tcase_add_test(tc_basic, test_bad_doctype_plus); tcase_add_test(tc_basic, test_bad_doctype_star); @@ -11878,6 +12120,7 @@ make_suite(void) { tcase_add_test(tc_namespace, test_ns_utf16_doctype); tcase_add_test(tc_namespace, test_ns_invalid_doctype); tcase_add_test(tc_namespace, test_ns_double_colon_doctype); + tcase_add_test(tc_namespace, test_ns_separator_in_uri); suite_add_tcase(s, tc_misc); tcase_add_checked_fixture(tc_misc, NULL, basic_teardown); diff --git a/xmlwf/Makefile.in b/xmlwf/Makefile.in index bcb36c2c2b9f..93b13a9fda48 100644 --- a/xmlwf/Makefile.in +++ b/xmlwf/Makefile.in @@ -235,6 +235,7 @@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ +CMAKE_SHARED_LIBRARY_PREFIX = @CMAKE_SHARED_LIBRARY_PREFIX@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ diff --git a/xmlwf/xmlfile.c b/xmlwf/xmlfile.c index e3d6259a55fa..e3521676c5b9 100644 --- a/xmlwf/xmlfile.c +++ b/xmlwf/xmlfile.c @@ -10,7 +10,7 @@ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net> - Copyright (c) 2005-2007 Steven Solie <ssolie@users.sourceforge.net> + Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> Copyright (c) 2019 David Loffredo <loffredo@steptools.com> diff --git a/xmlwf/xmlwf.c b/xmlwf/xmlwf.c index 29118bb64620..b0cd212f78ae 100644 --- a/xmlwf/xmlwf.c +++ b/xmlwf/xmlwf.c @@ -10,8 +10,8 @@ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> Copyright (c) 2004-2009 Karl Waclawek <karl@waclawek.net> - Copyright (c) 2005-2007 Steven Solie <ssolie@users.sourceforge.net> - Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> + Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> Copyright (c) 2019 David Loffredo <loffredo@steptools.com> Copyright (c) 2020 Joe Orton <jorton@redhat.com> @@ -1175,9 +1175,9 @@ tmain(int argc, XML_Char **argv) { if (! userData.fp) { tperror(outName); exitCode = XMLWF_EXIT_OUTPUT_ERROR; + free(outName); + XML_ParserFree(parser); if (continueOnError) { - free(outName); - cleanupUserData(&userData); continue; } else { break; |