diff --git a/debian/changelog b/debian/changelog index 98bc03d..566edf2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,14 @@ +libxml2 (2.12.7+dfsg+really2.9.14-2.1+deb13u3deepin1) unstable; urgency=medium + + * Fix CVE-2026-6653: entity amplification check rework + - Rework entity amplification check to fix billion-laughs attack + - Limit document size after entity substitution to 10x before + expansion + - Add saturation arithmetic to prevent 32-bit integer overflow + - Enable entity amplification check even when XML_PARSE_HUGE is set + + -- lichenggang Fri, 26 Jun 2026 22:29:41 +0800 + libxml2 (2.12.7+dfsg+really2.9.14-2.1+deb13u3) trixie; urgency=high * Non-maintainer upload. diff --git a/debian/patches/CVE-2026-6653.patch b/debian/patches/CVE-2026-6653.patch new file mode 100644 index 0000000..6e8078a --- /dev/null +++ b/debian/patches/CVE-2026-6653.patch @@ -0,0 +1,894 @@ +From 463bbeeca1805b5c4828f50d0fefc4eebaf620df Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Mon, 19 Dec 2022 18:39:45 +0100 +Subject: [PATCH] entities: Rework entity amplification checks + +This commit implements robust detection of entity amplification attacks, +better known as the "billion laughs" attack. + +We now limit the size of the document after substitution of entities to +10 times the size before expansion. This guarantees linear behavior by +definition. There already was a similar check before, but the accounting +of "sizeentities" (size of external entities) and "sizeentcopy" (size of +all copies created by entity references) wasn't accurate. + +We also need saturation arithmetic since we're historically limited to +"unsigned long" which is 32-bit on many platforms. + +A maximum of 10 MB of substitutions is always allowed. This should make +use cases like DITA work which have caused problems in the past. + +The old checks based on the number of entities were removed. This is +accounted for by adding a fixed cost to each entity reference. + +Entity amplification checks are now enabled even if XML_PARSE_HUGE is +set. This option is mainly used to allow larger text nodes. Most users +were unaware that it also disabled entity expansion checks. + +Some of the limits might be adjusted later. If this change turns out to +affect legitimate use cases, we can add a separate parser option to +disable the checks. + +Fixes #294. +Fixes #345. + +diff --git a/SAX2.c 2026-04-30 17:19:07.126033729 +0530 b/SAX2.c 2026-04-30 17:19:07.126033729 +0530 +--- a/SAX2.c 2026-04-30 17:19:07.126033729 +0530 ++++ b/SAX2.c 2026-04-30 17:19:07.122033740 +0530 +@@ -414,6 +414,9 @@ + int oldcharset; + const xmlChar *oldencoding; + ++ unsigned long consumed; ++ size_t buffered; ++ + /* + * Ask the Entity resolver to load the damn thing + */ +@@ -481,6 +484,18 @@ + + while (ctxt->inputNr > 1) + xmlPopInput(ctxt); ++ ++ consumed = ctxt->input->consumed; ++ buffered = ctxt->input->cur - ctxt->input->base; ++ if (buffered > ULONG_MAX - consumed) ++ consumed = ULONG_MAX; ++ else ++ consumed += buffered; ++ if (consumed > ULONG_MAX - ctxt->sizeentities) ++ ctxt->sizeentities = ULONG_MAX; ++ else ++ ctxt->sizeentities += consumed; ++ + xmlFreeInputStream(ctxt->input); + xmlFree(ctxt->inputTab); + +diff --git a/entities.c 2026-04-30 17:19:07.126033729 +0530 b/entities.c 2026-04-30 17:19:07.126033729 +0530 +--- a/entities.c 2026-04-30 17:19:07.126033729 +0530 ++++ b/entities.c 2026-04-30 17:19:07.122033740 +0530 +@@ -38,35 +38,35 @@ + NULL, NULL, NULL, NULL, NULL, NULL, + BAD_CAST "<", BAD_CAST "<", 1, + XML_INTERNAL_PREDEFINED_ENTITY, +- NULL, NULL, NULL, NULL, 0, 1 ++ NULL, NULL, NULL, NULL, 0, 1, 0, 0 + }; + static xmlEntity xmlEntityGt = { + NULL, XML_ENTITY_DECL, BAD_CAST "gt", + NULL, NULL, NULL, NULL, NULL, NULL, + BAD_CAST ">", BAD_CAST ">", 1, + XML_INTERNAL_PREDEFINED_ENTITY, +- NULL, NULL, NULL, NULL, 0, 1 ++ NULL, NULL, NULL, NULL, 0, 1, 0, 0 + }; + static xmlEntity xmlEntityAmp = { + NULL, XML_ENTITY_DECL, BAD_CAST "amp", + NULL, NULL, NULL, NULL, NULL, NULL, + BAD_CAST "&", BAD_CAST "&", 1, + XML_INTERNAL_PREDEFINED_ENTITY, +- NULL, NULL, NULL, NULL, 0, 1 ++ NULL, NULL, NULL, NULL, 0, 1, 0, 0 + }; + static xmlEntity xmlEntityQuot = { + NULL, XML_ENTITY_DECL, BAD_CAST "quot", + NULL, NULL, NULL, NULL, NULL, NULL, + BAD_CAST "\"", BAD_CAST "\"", 1, + XML_INTERNAL_PREDEFINED_ENTITY, +- NULL, NULL, NULL, NULL, 0, 1 ++ NULL, NULL, NULL, NULL, 0, 1, 0, 0 + }; + static xmlEntity xmlEntityApos = { + NULL, XML_ENTITY_DECL, BAD_CAST "apos", + NULL, NULL, NULL, NULL, NULL, NULL, + BAD_CAST "'", BAD_CAST "'", 1, + XML_INTERNAL_PREDEFINED_ENTITY, +- NULL, NULL, NULL, NULL, 0, 1 ++ NULL, NULL, NULL, NULL, 0, 1, 0, 0 + }; + + /** +diff --git a/include/libxml/entities.h 2026-04-30 17:19:07.126033729 +0530 b/include/libxml/entities.h 2026-04-30 17:19:07.126033729 +0530 +--- a/include/libxml/entities.h 2026-04-30 17:19:07.126033729 +0530 ++++ b/include/libxml/entities.h 2026-04-30 17:19:07.122033740 +0530 +@@ -30,6 +30,11 @@ + XML_INTERNAL_PREDEFINED_ENTITY = 6 + } xmlEntityType; + ++#define XML_ENT_PARSED (1u << 0) ++#define XML_ENT_CHECKED (1u << 1) ++#define XML_ENT_VALIDATED (1u << 2) ++#define XML_ENT_EXPANDING (1u << 3) ++ + /* + * An unit of storage for an entity, contains the string, the value + * and the linkind data needed for the linking in the hash table. +@@ -60,6 +65,8 @@ + /* this is also used to count entities + * references done from that entity + * and if it contains '<' */ ++ int flags; /* various flags */ ++ unsigned long expandedSize; + }; + + /* +diff --git a/include/libxml/parser.h 2026-04-30 17:19:07.126033729 +0530 b/include/libxml/parser.h 2026-04-30 17:19:07.126033729 +0530 +--- a/include/libxml/parser.h 2026-04-30 17:19:07.126033729 +0530 ++++ b/include/libxml/parser.h 2026-04-30 17:19:07.122033740 +0530 +@@ -74,6 +74,8 @@ + const xmlChar *version; /* the version string for entity */ + int standalone; /* Was that entity marked standalone */ + int id; /* an unique identifier for the entity */ ++ unsigned long parentConsumed; /* consumed bytes from parents */ ++ xmlEntityPtr entity; /* entity, if any */ + }; + + /** +diff --git a/parser.c 2026-04-30 17:19:07.126033729 +0530 b/parser.c 2026-04-30 17:19:07.126033729 +0530 +--- a/parser.c 2026-04-30 17:19:07.126033729 +0530 ++++ b/parser.c 2026-04-30 17:19:07.124033734 +0530 +@@ -128,150 +128,7 @@ + */ + #define XML_PARSER_NON_LINEAR 10 + +-/* +- * xmlParserEntityCheck +- * +- * Function to check non-linear entity expansion behaviour +- * This is here to detect and stop exponential linear entity expansion +- * This is not a limitation of the parser but a safety +- * boundary feature. It can be disabled with the XML_PARSE_HUGE +- * parser option. +- */ +-static int +-xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, +- xmlEntityPtr ent, size_t replacement) +-{ +- size_t consumed = 0; +- int i; +- +- if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) +- return (0); +- if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) +- return (1); +- +- /* +- * This may look absurd but is needed to detect +- * entities problems +- */ +- if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && +- (ent->content != NULL) && (ent->checked == 0) && +- (ctxt->errNo != XML_ERR_ENTITY_LOOP)) { +- unsigned long oldnbent = ctxt->nbentities, diff; +- xmlChar *rep; +- +- ent->checked = 1; +- +- ++ctxt->depth; +- rep = xmlStringDecodeEntities(ctxt, ent->content, +- XML_SUBSTITUTE_REF, 0, 0, 0); +- --ctxt->depth; +- if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) { +- ent->content[0] = 0; +- } +- +- diff = ctxt->nbentities - oldnbent + 1; +- if (diff > INT_MAX / 2) +- diff = INT_MAX / 2; +- ent->checked = diff * 2; +- if (rep != NULL) { +- if (xmlStrchr(rep, '<')) +- ent->checked |= 1; +- xmlFree(rep); +- rep = NULL; +- } +- } +- +- /* +- * Prevent entity exponential check, not just replacement while +- * parsing the DTD +- * The check is potentially costly so do that only once in a thousand +- */ +- if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) && +- (ctxt->nbentities % 1024 == 0)) { +- for (i = 0;i < ctxt->inputNr;i++) { +- consumed += ctxt->inputTab[i]->consumed + +- (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base); +- } +- if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) { +- xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); +- ctxt->instate = XML_PARSER_EOF; +- return (1); +- } +- consumed = 0; +- } +- +- +- +- if (replacement != 0) { +- if (replacement < XML_MAX_TEXT_LENGTH) +- return(0); +- +- /* +- * If the volume of entity copy reaches 10 times the +- * amount of parsed data and over the large text threshold +- * then that's very likely to be an abuse. +- */ +- if (ctxt->input != NULL) { +- consumed = ctxt->input->consumed + +- (ctxt->input->cur - ctxt->input->base); +- } +- consumed += ctxt->sizeentities; +- +- if (replacement < XML_PARSER_NON_LINEAR * consumed) +- return(0); +- } else if (size != 0) { +- /* +- * Do the check based on the replacement size of the entity +- */ +- if (size < XML_PARSER_BIG_ENTITY) +- return(0); +- +- /* +- * A limit on the amount of text data reasonably used +- */ +- if (ctxt->input != NULL) { +- consumed = ctxt->input->consumed + +- (ctxt->input->cur - ctxt->input->base); +- } +- consumed += ctxt->sizeentities; +- +- if ((size < XML_PARSER_NON_LINEAR * consumed) && +- (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) +- return (0); +- } else if (ent != NULL) { +- /* +- * use the number of parsed entities in the replacement +- */ +- size = ent->checked / 2; +- +- /* +- * The amount of data parsed counting entities size only once +- */ +- if (ctxt->input != NULL) { +- consumed = ctxt->input->consumed + +- (ctxt->input->cur - ctxt->input->base); +- } +- consumed += ctxt->sizeentities; +- +- /* +- * Check the density of entities for the amount of data +- * knowing an entity reference will take at least 3 bytes +- */ +- if (size * 3 < consumed * XML_PARSER_NON_LINEAR) +- return (0); +- } else { +- /* +- * strange we got no data for checking +- */ +- if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && +- (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || +- (ctxt->nbentities <= 10000)) +- return (0); +- } +- xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); +- return (1); +-} +- ++#define XML_ENT_FIXED_COST 50 + /** + * xmlParserMaxDepth: + * +@@ -867,6 +724,84 @@ + info1, info2, info3); + } + ++static void ++xmlSaturatedAdd(unsigned long *dst, unsigned long val) { ++ if (val > ULONG_MAX - *dst) ++ *dst = ULONG_MAX; ++ else ++ *dst += val; ++} ++ ++static void ++xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) { ++ if (val > ULONG_MAX - *dst) ++ *dst = ULONG_MAX; ++ else ++ *dst += val; ++} ++ ++/** ++ * xmlParserEntityCheck: ++ * @ctxt: parser context ++ * @extra: sum of unexpanded entity sizes ++ * ++ * Check for non-linear entity expansion behaviour. ++ * ++ * In some cases like xmlStringDecodeEntities, this function is called ++ * for each, possibly nested entity and its unexpanded content length. ++ * ++ * In other cases like xmlParseReference, it's only called for each ++ * top-level entity with its unexpanded content length plus the sum of ++ * the unexpanded content lengths (plus fixed cost) of all nested ++ * entities. ++ * ++ * Summing the unexpanded lengths also adds the length of the reference. ++ * This is by design. Taking the length of the entity name into account ++ * discourages attacks that try to waste CPU time with abusively long ++ * entity names. See test/recurse/lol6.xml for example. Each call also ++ * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with ++ * short entities. ++ * ++ * Returns 1 on error, 0 on success. ++ */ ++static int ++xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra) ++{ ++ unsigned long consumed; ++ ++ /* ++ * Compute total consumed bytes so far, including input streams of ++ * external entities. ++ */ ++ consumed = ctxt->input->parentConsumed; ++ xmlSaturatedAdd(&consumed, ctxt->input->consumed); ++ xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base); ++ xmlSaturatedAdd(&consumed, ctxt->sizeentities); ++ ++ /* ++ * Add extra cost and some fixed cost. ++ */ ++ xmlSaturatedAdd(&ctxt->sizeentcopy, extra); ++ xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST); ++ ++ /* ++ * It's important to always use saturation arithmetic when tracking ++ * entity sizes to make the size checks reliable. If "sizeentcopy" ++ * overflows, we have to abort. ++ */ ++ if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) && ++ ((ctxt->sizeentcopy >= ULONG_MAX) || ++ (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) { ++ xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP, ++ "Maximum entity amplification factor exceeded"); ++ xmlHaltParser(ctxt); ++ return(1); ++ } ++ ++ return(0); ++} ++ ++ + /************************************************************************ + * * + * Library wide options * +@@ -2233,8 +2168,28 @@ + break; + xmlParsePEReference(ctxt); + } else if (CUR == 0) { ++ unsigned long consumed; ++ xmlEntityPtr ent; + if (ctxt->inputNr <= 1) + break; ++ consumed = ctxt->input->consumed; ++ xmlSaturatedAddSizeT(&consumed, ++ ctxt->input->cur - ctxt->input->base); ++ ++ /* ++ * Add to sizeentities when parsing an external entity ++ * for the first time. ++ */ ++ ent = ctxt->input->entity; ++ if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) && ++ ((ent->flags & XML_ENT_PARSED) == 0)) { ++ ent->flags |= XML_ENT_PARSED; ++ ++ xmlSaturatedAdd(&ctxt->sizeentities, consumed); ++ } ++ ++ xmlParserEntityCheck(ctxt, consumed); ++ + xmlPopInput(ctxt); + } else { + break; +@@ -2647,9 +2602,10 @@ + * Returns A newly allocated string with the substitution done. The caller + * must deallocate it ! + */ +-xmlChar * +-xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, +- int what, xmlChar end, xmlChar end2, xmlChar end3) { ++static xmlChar * ++xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, ++ int what, xmlChar end, xmlChar end2, xmlChar end3, ++ int check) { + xmlChar *buffer = NULL; + size_t buffer_size = 0; + size_t nbchars = 0; +@@ -2660,7 +2616,7 @@ + xmlEntityPtr ent; + int c,l; + +- if ((ctxt == NULL) || (str == NULL) || (len < 0)) ++ if (str == NULL) + return(NULL); + last = str + len; + +@@ -2705,7 +2661,6 @@ + "String decoding Entity Reference: %.30s\n", + str); + ent = xmlParseStringEntityRef(ctxt, &str); +- xmlParserEntityCheck(ctxt, 0, ent, 0); + if (ent != NULL) + ctxt->nbentities += ent->checked / 2; + if ((ent != NULL) && +@@ -2721,9 +2676,11 @@ + goto int_error; + } + } else if ((ent != NULL) && (ent->content != NULL)) { ++ if ((check) && (xmlParserEntityCheck(ctxt, ent->length))) ++ goto int_error; + ctxt->depth++; +- rep = xmlStringDecodeEntities(ctxt, ent->content, what, +- 0, 0, 0); ++ rep = xmlStringDecodeEntitiesInt(ctxt, ent->content, ++ ent->length, what, 0, 0, 0, check); + ctxt->depth--; + if (rep == NULL) { + ent->content[0] = 0; +@@ -2734,8 +2691,6 @@ + while (*current != 0) { /* non input consuming loop */ + buffer[nbchars++] = *current++; + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { +- if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) +- goto int_error; + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); + } + } +@@ -2758,7 +2713,6 @@ + xmlGenericError(xmlGenericErrorContext, + "String decoding PE Reference: %.30s\n", str); + ent = xmlParseStringPEReference(ctxt, &str); +- xmlParserEntityCheck(ctxt, 0, ent, 0); + if (ent != NULL) + ctxt->nbentities += ent->checked / 2; + if (ent != NULL) { +@@ -2779,9 +2733,11 @@ + ent->name, NULL); + } + } ++ if ((check) && (xmlParserEntityCheck(ctxt, ent->length))) ++ goto int_error; + ctxt->depth++; +- rep = xmlStringDecodeEntities(ctxt, ent->content, what, +- 0, 0, 0); ++ rep = xmlStringDecodeEntitiesInt(ctxt, ent->content, ++ ent->length, what, 0, 0, 0, check); + ctxt->depth--; + if (rep == NULL) { + if (ent->content != NULL) +@@ -2792,8 +2748,6 @@ + while (*current != 0) { /* non input consuming loop */ + buffer[nbchars++] = *current++; + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { +- if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) +- goto int_error; + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); + } + } +@@ -2825,6 +2779,36 @@ + return(NULL); + } + ++ ++/** ++ * xmlStringLenDecodeEntities: ++ * @ctxt: the parser context ++ * @str: the input string ++ * @len: the string length ++ * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF ++ * @end: an end marker xmlChar, 0 if none ++ * @end2: an end marker xmlChar, 0 if none ++ * @end3: an end marker xmlChar, 0 if none ++ * ++ * Takes a entity string content and process to do the adequate substitutions. ++ * ++ * [67] Reference ::= EntityRef | CharRef ++ * ++ * [69] PEReference ::= '%' Name ';' ++ * ++ * Returns A newly allocated string with the substitution done. The caller ++ * must deallocate it ! ++ */ ++xmlChar * ++xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, ++ int what, xmlChar end, xmlChar end2, ++ xmlChar end3) { ++ if ((ctxt == NULL) || (str == NULL) || (len < 0)) ++ return(NULL); ++ return(xmlStringDecodeEntitiesInt(ctxt, str, len, what, ++ end, end2, end3, 0)); ++} ++ + /** + * xmlStringDecodeEntities: + * @ctxt: the parser context +@@ -2847,8 +2831,8 @@ + xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, + xmlChar end, xmlChar end2, xmlChar end3) { + if ((ctxt == NULL) || (str == NULL)) return(NULL); +- return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, +- end, end2, end3)); ++ return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what, ++ end, end2, end3, 0)); + } + + /************************************************************************ +@@ -3899,8 +3883,8 @@ + * so XML_SUBSTITUTE_REF is not set here. + */ + ++ctxt->depth; +- ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, +- 0, 0, 0); ++ ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF, ++ 0, 0, 0, /* check */ 1); + --ctxt->depth; + if (orig != NULL) { + *orig = buf; +@@ -4020,10 +4004,12 @@ + } else if ((ent != NULL) && + (ctxt->replaceEntities != 0)) { + if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { ++ if (xmlParserEntityCheck(ctxt, ent->length)) ++ goto error; + ++ctxt->depth; +- rep = xmlStringDecodeEntities(ctxt, ent->content, +- XML_SUBSTITUTE_REF, +- 0, 0, 0); ++ rep = xmlStringDecodeEntitiesInt(ctxt, ent->content, ++ ent->length, XML_SUBSTITUTE_REF, 0, 0, 0, ++ /* check */ 1); + --ctxt->depth; + if (rep != NULL) { + current = rep; +@@ -4057,25 +4043,35 @@ + * entities problems + */ + if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && +- (ent->content != NULL) && (ent->checked == 0)) { +- unsigned long oldnbent = ctxt->nbentities, diff; +- +- ++ctxt->depth; +- rep = xmlStringDecodeEntities(ctxt, ent->content, +- XML_SUBSTITUTE_REF, 0, 0, 0); +- --ctxt->depth; +- +- diff = ctxt->nbentities - oldnbent + 1; +- if (diff > INT_MAX / 2) +- diff = INT_MAX / 2; +- ent->checked = diff * 2; +- if (rep != NULL) { +- if (xmlStrchr(rep, '<')) +- ent->checked |= 1; +- xmlFree(rep); +- rep = NULL; +- } else { +- ent->content[0] = 0; ++ (ent->content != NULL)) { ++ if (ent->checked == 0) { ++ unsigned long oldnbent = ctxt->nbentities; ++ unsigned long oldCopy = ctxt->sizeentcopy; ++ ++ ctxt->sizeentcopy = ent->length; ++ ++ ++ctxt->depth; ++ rep = xmlStringDecodeEntitiesInt(ctxt, ++ ent->content, ent->length, ++ XML_SUBSTITUTE_REF, 0, 0, 0, ++ /* check */ 1); ++ --ctxt->depth; ++ ++ ent->checked = ctxt->nbentities - oldnbent + 1; ++ ent->expandedSize = ctxt->sizeentcopy; ++ ++ if (rep != NULL) { ++ xmlFree(rep); ++ rep = NULL; ++ } else { ++ ent->content[0] = 0; ++ } ++ ++ if (xmlParserEntityCheck(ctxt, oldCopy)) ++ goto error; ++ } else { ++ if (xmlParserEntityCheck(ctxt, ent->expandedSize)) ++ goto error; + } + } + +@@ -7177,6 +7173,7 @@ + ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || + (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { + unsigned long oldnbent = ctxt->nbentities, diff; ++ unsigned long oldsizeentcopy = ctxt->sizeentcopy; + + /* + * This is a bit hackish but this seems the best +@@ -7188,6 +7185,7 @@ + user_data = NULL; + else + user_data = ctxt->userData; ++ ctxt->sizeentcopy = 0; + + /* + * Check that this entity is well formed +@@ -7212,7 +7210,7 @@ + xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, + "invalid entity type found\n", NULL); + } +- ++ ent->expandedSize = ctxt->sizeentcopy; + /* + * Store the number of entities needing parsing for this entity + * content and do checkings +@@ -7229,7 +7227,7 @@ + xmlFreeNodeList(list); + return; + } +- if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { ++ if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) { + xmlFreeNodeList(list); + return; + } +@@ -7281,7 +7279,6 @@ + "Entity '%s' failed to parse\n", ent->name); + if (ent->content != NULL) + ent->content[0] = 0; +- xmlParserEntityCheck(ctxt, 0, ent, 0); + } else if (list != NULL) { + xmlFreeNodeList(list); + list = NULL; +@@ -7326,11 +7323,13 @@ + ctxt->depth--; + } else if (ent->etype == + XML_EXTERNAL_GENERAL_PARSED_ENTITY) { ++ unsigned long oldsizeentities = ctxt->sizeentities; + ctxt->depth++; + ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, + ctxt->sax, user_data, ctxt->depth, + ent->URI, ent->ExternalID, NULL); + ctxt->depth--; ++ ctxt->sizeentities = oldsizeentities; + } else { + ret = XML_ERR_ENTITY_PE_INTERNAL; + xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, +@@ -7340,6 +7339,8 @@ + xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); + return; + } ++ if (xmlParserEntityCheck(ctxt, 0)) ++ return; + } + if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && + (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { +@@ -7351,7 +7352,9 @@ + } + return; + } +- ++ if ((was_checked != 0) && ++ (xmlParserEntityCheck(ctxt, ent->expandedSize))) ++ return; + /* + * If we didn't get any children for the entity being built + */ +@@ -7389,13 +7392,6 @@ + xmlNodePtr nw = NULL, cur, firstChild = NULL; + + /* +- * We are copying here, make sure there is no abuse +- */ +- ctxt->sizeentcopy += ent->length + 5; +- if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) +- return; +- +- /* + * when operating on a reader, the entities definitions + * are always owning the entities subtree. + if (ctxt->parseMode == XML_PARSE_READER) +@@ -7436,12 +7432,6 @@ + xmlNodePtr nw = NULL, cur, next, last, + firstChild = NULL; + +- /* +- * We are copying here, make sure there is no abuse +- */ +- ctxt->sizeentcopy += ent->length + 5; +- if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) +- return; + + /* + * Copy the entity child list and make it the new +@@ -7625,7 +7615,6 @@ + ctxt->sax->reference(ctxt->userData, name); + } + } +- xmlParserEntityCheck(ctxt, 0, ent, 0); + ctxt->valid = 0; + } + +@@ -7819,7 +7808,6 @@ + "Entity '%s' not defined\n", + name); + } +- xmlParserEntityCheck(ctxt, 0, ent, 0); + /* TODO ? check regressions ctxt->valid = 0; */ + } + +@@ -7986,7 +7974,6 @@ + name, NULL); + ctxt->valid = 0; + } +- xmlParserEntityCheck(ctxt, 0, NULL, 0); + } else { + /* + * Internal checking in case the entity quest barfed +@@ -8000,8 +7987,7 @@ + xmlChar start[4]; + xmlCharEncoding enc; + +- if (xmlParserEntityCheck(ctxt, 0, entity, 0)) +- return; ++ unsigned long parentConsumed; + + if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && + ((ctxt->options & XML_PARSE_NOENT) == 0) && +@@ -8012,11 +7998,16 @@ + (ctxt->validate == 0)) + return; + ++ parentConsumed = ctxt->input->parentConsumed; ++ xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed); ++ xmlSaturatedAddSizeT(&parentConsumed, ++ ctxt->input->cur - ctxt->input->base); + input = xmlNewEntityInputStream(ctxt, entity); + if (xmlPushInput(ctxt, input) < 0) { + xmlFreeInputStream(input); + return; + } ++ input->parentConsumed = parentConsumed; + + if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) { + /* +@@ -8134,6 +8125,7 @@ + } + + if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { ++ xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed); + xmlPopInput(ctxt); + } else if (!IS_CHAR(c)) { + xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, +@@ -8143,6 +8135,7 @@ + return(-1); + } + entity->content = buf->content; ++ entity->length = buf->use; + buf->content = NULL; + xmlBufferFree(buf); + +@@ -8251,7 +8244,6 @@ + name, NULL); + ctxt->valid = 0; + } +- xmlParserEntityCheck(ctxt, 0, NULL, 0); + } else { + /* + * Internal checking in case the entity quest barfed +@@ -9165,6 +9157,9 @@ + NEXT; + SKIP_BLANKS; + val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); ++ if (val == NULL) { ++ return(NULL); ++ } + if (normalize) { + /* + * Sometimes a second normalisation pass for spaces is needed +@@ -13197,8 +13192,15 @@ + * Also record the size of the entity parsed + */ + if (ctxt->input != NULL && oldctxt != NULL) { +- oldctxt->sizeentities += ctxt->input->consumed; +- oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); ++ unsigned long consumed = ctxt->input->consumed; ++ ++ xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base); ++ ++ xmlSaturatedAdd(&oldctxt->sizeentities, consumed); ++ xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities); ++ ++ xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed); ++ xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy); + } + /* + * And record the last error if any +@@ -13463,6 +13465,18 @@ + oldctxt->nbentities += ctxt->nbentities; + + /* ++ * Also record the size of the entity parsed ++ */ ++ if (ctxt->input != NULL && oldctxt != NULL) { ++ unsigned long consumed = ctxt->input->consumed; ++ ++ xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base); ++ ++ xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed); ++ xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy); ++ } ++ ++ /* + * Also record the last error if any + */ + if (ctxt->lastError.code != XML_ERR_OK) +diff --git a/parserInternals.c 2026-04-30 17:19:07.126033729 +0530 b/parserInternals.c 2026-04-30 17:19:07.126033729 +0530 +--- a/parserInternals.c 2026-04-30 17:19:07.126033729 +0530 ++++ b/parserInternals.c 2026-04-30 17:19:07.124033734 +0530 +@@ -1446,8 +1446,11 @@ + break; + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + case XML_EXTERNAL_PARAMETER_ENTITY: +- return(xmlLoadExternalEntity((char *) entity->URI, +- (char *) entity->ExternalID, ctxt)); ++ input = xmlLoadExternalEntity((char *) entity->URI, ++ (char *) entity->ExternalID, ctxt); ++ if (input != NULL) ++ input->entity = entity; ++ return(input); + case XML_INTERNAL_GENERAL_ENTITY: + xmlErrInternal(ctxt, + "Internal entity %s without content !\n", +@@ -1478,6 +1481,7 @@ + input->cur = entity->content; + input->length = entity->length; + input->end = &entity->content[input->length]; ++ input->entity = entity; + return(input); + } + +diff --git a/testrecurse.c 2026-04-30 17:19:07.126033729 +0530 b/testrecurse.c 2026-04-30 17:19:07.126033729 +0530 +--- a/testrecurse.c 2026-04-30 17:19:07.126033729 +0530 ++++ b/testrecurse.c 2026-04-30 17:19:07.124033734 +0530 +@@ -166,7 +166,9 @@ + ]> \ + "; + +-static const char *segment = " &e; &f; &d;\n"; ++static const char *segment = ++ " &e; &f; &d;\n" ++ " _123456789_123456789_123456789_123456789\n"; + static const char *finish = ""; + + static int curseg = 0; diff --git a/debian/patches/series b/debian/patches/series index 33a3ec1..3e606e7 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -38,3 +38,4 @@ CVE-2026-0989.patch 0038-fix-memory-leak-in-issue-1054.patch 0039-schematron-fix-additional-memory-leaks-on-error-path.patch 0040-catalog-fix-stack-overflow-from-self-referencing-SGM.patch +CVE-2026-6653.patch