closeio · marcospri · Jun 8, 2026 · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026
diff --git a/quotequail/_html.py b/quotequail/_html.py
@@ -1,4 +1,5 @@
 # HTML utils
+import contextlib
 import html
 from collections.abc import Iterator
 from typing import TYPE_CHECKING, TypeAlias
@@ -243,15 +244,18 @@ def get_html_tree(html_str: str) -> Element:
             el.attrib["__tag_name"] = f"{prefix}:{el.tag}"
             el.tag = "span"
 
-        elif ":" in el.tag:
+        elif ":" in el.tag and not any(c in el.tag for c in ('"', "=", " ")):
             # Outlook <o:p> padding: same treatment, round-tripped.
+            # Only applies to genuine namespace tags (e.g. o:p, v:shape).
+            # Tags that contain '"', '=', or spaces alongside ':' are garbage
+            # from malformed HTML (e.g. <ahref="https:...>) and must be
+            # flattened instead, since restoring them would raise ValueError.
             el.attrib["__tag_name"] = el.tag
             el.tag = "span"
 
-        elif "@" in el.tag or "=" in el.tag:
-            # Mail client forgot to escape <addr@domain> or used other
-            # XPath-special chars (like =) in tag names. Flatten back to
-            # visible text so the address actually renders.
+        elif ":" in el.tag or "@" in el.tag or "=" in el.tag:
+            # Malformed tag whose name contains XPath-special or otherwise
+            # invalid characters. Flatten back to visible text.
             attrs = "".join(
                 f' {k}="{html.escape(v, quote=True)}"'
                 for k, v in el.attrib.items()
@@ -283,7 +287,11 @@ def render_html_tree(tree: Element) -> str:
     for el in tree.iter():
         if "__tag_name" in el.attrib:
             actual_tag_name = el.attrib.pop("__tag_name")
-            el.tag = actual_tag_name
+            # If lxml rejects restoring the tag name (malformed input),
+            # leave the element as a span,  __tag_name is already
+            # popped so it won't appear in the output.
+            with contextlib.suppress(ValueError):
+                el.tag = actual_tag_name
 
     html_str = lxml.html.tostring(tree, encoding="utf8").decode("utf8")
 

diff --git a/tests/test_html.py b/tests/test_html.py
@@ -1,3 +1,6 @@
+import lxml.html
+import pytest
+
 from quotequail._html import (
     Position,
     get_html_tree,
@@ -106,9 +109,48 @@ def test_trim_before():
     assert render_html_tree(tree) == "<div>E</div>"
 
 
-def test_get_html_tree_flattens_at_pseudo_tag_with_attributes():
-    # Unescaped <addr@domain attr="..."> pseudo-tags must round-trip as
-    # visible text without losing attribute values.
-    html = '<div>x<addr@domain foo="bar">y</addr@domain>z</div>'
-    rendered = render_html_tree(get_html_tree(html))
-    assert rendered == '<div>x&lt;addr@domain foo="bar"&gt;yz</div>'
+@pytest.mark.parametrize(
+    ("html", "expected"),
+    [
+        # '@' in tag name — unescaped email-style pseudo-tag
+        (
+            '<div>x<addr@domain foo="bar">y</addr@domain>z</div>',
+            '<div>x&lt;addr@domain foo="bar"&gt;yz</div>',
+        ),
+        # ':' and '"' in tag name — lxml parses <ahref="https://..."> this way
+        (
+            '<div>x<ahref="https://example.com">click</ahref>z</div>',
+            '<div>x&lt;ahref="https: example.com"=""&gt;clickz</div>',
+        ),
+        # ':' and '=' in tag name — e.g. <a:b=c>
+        (
+            "<div>x<a:b=c>click</a:b>z</div>",
+            "<div>x&lt;a:b=c&gt;clickz</div>",
+        ),
+    ],
+)
+def test_get_html_tree_flattens_malformed_tags(html, expected):
+    # Tags whose names contain XPath-special or invalid characters
+    # must be rendered as escaped visible text rather than roundtripped as real
+    # tags,which would raise ValueError in lxml
+    assert render_html_tree(get_html_tree(html)) == expected
+
+
+def test_get_html_tree_outlook_tag_roundtrip():
+    # Outlook uses <o:p> for paragraph padding. The tag must survive the
+    # get_html_tree → render_html_tree roundtrip unchanged.
+    html = "<div>foo<o:p></o:p>bar</div>"
+    assert (
+        render_html_tree(get_html_tree(html)) == "<div>foo<o:p></o:p>bar</div>"
+    )
+
+
+def test_render_html_tree_suppresses_space_in_stored_tag_name():
+    # Verify that if a tag name containing ':' and ' ' somehow ends up in
+    # __tag_name, render_html_tree must not raise ValueError.
+    tree = lxml.html.fragment_fromstring("<div><span>text</span></div>")
+    span = tree.find("span")
+    span.attrib["__tag_name"] = "o:p style"
+    result = render_html_tree(tree)
+    assert "text" in result
+    assert "__tag_name" not in result