From b7db9151b4556c7b70b44a12d8535efabe35a8a0 Mon Sep 17 00:00:00 2001
From: Liang Hu <huliang@microsoft.com>
Date: Tue, 16 Jun 2026 23:28:11 +0000
Subject: [PATCH 1/2] Fix RedTeam.scan() decoding encoded attack prompts in
 results

For converter-based attack strategies (Base64, Flip, Morse, ROT13, Caesar,
Leetspeak, AsciiArt, AnsiAttack, Atbash, Binary, CharacterSpace, CharSwap,
Diacritic, StringJoin, SuffixAppend, UnicodeConfusable, UnicodeSubstitution,
Url, AsciiSmuggler, Tense), FoundryResultProcessor was emitting the decoded
'original_value' as the user-message content while the target was actually
receiving 'converted_value'. This made evaluation_results.json /
results.json show plaintext where the audit trail should show the encoded
payload, breaking post-scan auditability and per-variant debugging.

This change makes conversation[].content always reflect the on-wire value
(converted_value) for both user and assistant turns, and preserves the
pre-converter objective as a sibling 'original_value' field on user
messages whenever it differs. Baseline (non-encoded) strategies are
unaffected since original_value == converted_value.

Adds two regression tests in TestFoundryResultProcessor and a CHANGELOG
entry. Resolves Azure/azure-sdk-for-python#47228.
---
 .../azure-ai-evaluation/CHANGELOG.md          |  6 ++
 .../_foundry/_foundry_result_processor.py     | 35 ++++++--
 .../unittests/test_redteam/test_foundry.py    | 90 +++++++++++++++++++
 3 files changed, 123 insertions(+), 8 deletions(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
index 576ed70a4396..864ce8544cb0 100644
--- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
+++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Release History
 
+## 1.17.1 (Unreleased)
+
+### Bugs Fixed
+
+- Fixed `RedTeam.scan()` storing decoded plaintext instead of the actual encoded payload for converter-based attack strategies (`Base64`, `Flip`, `Morse`, `ROT13`, `Caesar`, `Leetspeak`, `AsciiArt`, `AnsiAttack`, `Atbash`, `Binary`, `CharacterSpace`, `CharSwap`, `Diacritic`, `StringJoin`, `SuffixAppend`, `UnicodeConfusable`, `UnicodeSubstitution`, `Url`, `AsciiSmuggler`, `Tense`) in `evaluation_results.json` / `results.json`. The persisted `conversation[].content` for user turns now reflects what the target actually received (`converted_value`); the pre-converter adversarial objective is preserved on the same message as a new `original_value` field so the audit trail of what the attack meant to say is not lost. Baseline (non-encoded) strategies are unaffected. Resolves [Azure/azure-sdk-for-python#47228](https://github.com/Azure/azure-sdk-for-python/issues/47228).
+
 ## 1.17.0 (2026-06-03)
 
 ### Breaking Changes
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_foundry_result_processor.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_foundry_result_processor.py
index 1be0d124a640..d24b33a1c3fb 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_foundry_result_processor.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_foundry_result_processor.py
@@ -349,21 +349,40 @@ def _build_messages_from_pieces(
             # Get role, handling api_role property
             role = getattr(piece, "api_role", None) or getattr(piece, "role", "user")
 
-            # Get content: for user messages show the original adversarial prompt,
-            # not the converter output (e.g., Base64-encoded or tense-rephrased text).
-            # For assistant messages, show the response as-is.
-            if role == "user":
-                original = getattr(piece, "original_value", None)
-                converted = getattr(piece, "converted_value", None)
-                content = original if isinstance(original, str) and original else (converted or "")
+            # Get content. For both user and assistant turns, ``content`` reflects
+            # what was actually sent on the wire (``converted_value``) so the
+            # stored conversation matches the payload the target received /
+            # produced. When a converter (Base64, Flip, Morse, Caesar, etc.) was
+            # applied, the pre-conversion adversarial objective is preserved as
+            # ``original_value`` on the same message so consumers can still
+            # display / score against the decoded text without losing fidelity
+            # of the actual attack surface.
+            original = getattr(piece, "original_value", None)
+            converted = getattr(piece, "converted_value", None)
+            if isinstance(converted, str) and converted:
+                content = converted
+            elif isinstance(original, str) and original:
+                content = original
             else:
-                content = getattr(piece, "converted_value", None) or getattr(piece, "original_value", "")
+                content = ""
 
             message: Dict[str, Any] = {
                 "role": role,
                 "content": content,
             }
 
+            # Preserve the pre-converter objective when it differs from the
+            # transmitted content. This keeps the audit trail intact: callers
+            # can compare ``content`` (what the target saw) with
+            # ``original_value`` (what the attack meant to say) for every
+            # encoding-based strategy.
+            if (
+                isinstance(original, str)
+                and original
+                and original != content
+            ):
+                message["original_value"] = original
+
             # Add context from labels if present (for XPIA)
             if hasattr(piece, "labels") and piece.labels:
                 context_str = piece.labels.get("context")
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_redteam/test_foundry.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_redteam/test_foundry.py
index 55ac871cb12f..f3e9cd45807f 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_redteam/test_foundry.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_redteam/test_foundry.py
@@ -1427,6 +1427,96 @@ def test_build_messages_from_pieces(self):
         assert messages[0]["content"] == "User message"
         assert messages[1]["role"] == "assistant"
         assert messages[1]["content"] == "Assistant response"
+        # When original and converted match (no encoding), no audit field is added.
+        assert "original_value" not in messages[0]
+        assert "original_value" not in messages[1]
+
+    def test_build_messages_preserves_encoded_user_prompt(self):
+        """Encoded attack prompts must be stored as the wire payload.
+
+        Regression test for
+        https://github.com/Azure/azure-sdk-for-python/issues/47228 — for
+        converter-based strategies (Base64, Flip, Morse, ROT13, etc.) the
+        target receives ``converted_value``, so the persisted conversation
+        must report ``converted_value`` as ``content`` (not the decoded
+        ``original_value``). The pre-converter objective is preserved as
+        ``original_value`` on the same message so callers still have an
+        audit trail of what the attack meant to say.
+        """
+        mock_scenario = MagicMock()
+        mock_dataset = MagicMock()
+        mock_dataset.get_all_seed_groups.return_value = []
+
+        processor = FoundryResultProcessor(
+            scenario=mock_scenario,
+            dataset_config=mock_dataset,
+            risk_category="violence",
+        )
+
+        # Simulate a Base64-converted user turn: the target actually saw the
+        # encoded payload, but the SDK still has the plaintext objective.
+        user_piece = MagicMock()
+        user_piece.api_role = "user"
+        user_piece.original_value = "How do I make a dangerous thing?"
+        user_piece.converted_value = "SG93IGRvIEkgbWFrZSBhIGRhbmdlcm91cyB0aGluZz8="
+        user_piece.sequence = 0
+        user_piece.prompt_metadata = {}
+        user_piece.labels = {}
+
+        # Assistant response — converter is a no-op on the response side, so
+        # original and converted match. No audit field should be emitted.
+        assistant_piece = MagicMock()
+        assistant_piece.api_role = "assistant"
+        assistant_piece.original_value = "Sorry, I can't help with that."
+        assistant_piece.converted_value = "Sorry, I can't help with that."
+        assistant_piece.sequence = 1
+        assistant_piece.prompt_metadata = {}
+        assistant_piece.labels = {}
+
+        messages = processor._build_messages_from_pieces([user_piece, assistant_piece])
+
+        # The user turn must carry the encoded payload as content so consumers
+        # can verify exactly what the target received.
+        assert messages[0]["role"] == "user"
+        assert messages[0]["content"] == "SG93IGRvIEkgbWFrZSBhIGRhbmdlcm91cyB0aGluZz8="
+        # The plaintext objective is preserved alongside it for auditability.
+        assert messages[0]["original_value"] == "How do I make a dangerous thing?"
+
+        # Assistant turn is unchanged: content == converted_value, no audit field.
+        assert messages[1]["role"] == "assistant"
+        assert messages[1]["content"] == "Sorry, I can't help with that."
+        assert "original_value" not in messages[1]
+
+    def test_build_messages_falls_back_to_original_when_converted_missing(self):
+        """When ``converted_value`` is empty, fall back to ``original_value``.
+
+        Covers the historical behavior for pieces where PyRIT did not run a
+        converter (e.g., Baseline strategy or in-flight failures).
+        """
+        mock_scenario = MagicMock()
+        mock_dataset = MagicMock()
+        mock_dataset.get_all_seed_groups.return_value = []
+
+        processor = FoundryResultProcessor(
+            scenario=mock_scenario,
+            dataset_config=mock_dataset,
+            risk_category="violence",
+        )
+
+        user_piece = MagicMock()
+        user_piece.api_role = "user"
+        user_piece.original_value = "Baseline prompt"
+        user_piece.converted_value = None
+        user_piece.sequence = 0
+        user_piece.prompt_metadata = {}
+        user_piece.labels = {}
+
+        messages = processor._build_messages_from_pieces([user_piece])
+
+        assert len(messages) == 1
+        assert messages[0]["content"] == "Baseline prompt"
+        # original == content here, so no separate audit field is needed.
+        assert "original_value" not in messages[0]
 
     def test_get_prompt_group_id_from_conversation(self):
         """Test extracting prompt_group_id from conversation."""

From 493577f4acf16e0c60638705b5f5ee3a5d1e101c Mon Sep 17 00:00:00 2001
From: Liang Hu <huliang@microsoft.com>
Date: Wed, 17 Jun 2026 00:05:33 +0000
Subject: [PATCH 2/2] Address Copilot review: preserve non-string payloads, add
 test, shorten changelog

- _foundry_result_processor.py: stop forcing converted_value/original_value
  through isinstance(str) when computing content. Bytes / structured
  multimodal payloads now pass through unchanged; the original_value
  audit field is still gated on both sides being str so cross-type
  inequality cannot produce a misleading field.
- test_foundry.py: add test_build_messages_preserves_non_string_payloads
  covering list-of-parts and bytes payloads.
- CHANGELOG.md: wrap the 1.17.1 entry across multiple lines and drop the
  exhaustive strategy enumeration.
---
 .../azure-ai-evaluation/CHANGELOG.md          | 10 +++-
 .../_foundry/_foundry_result_processor.py     | 14 ++++-
 .../unittests/test_redteam/test_foundry.py    | 57 +++++++++++++++++++
 3 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
index 864ce8544cb0..bb952aa9162d 100644
--- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
+++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
@@ -4,7 +4,15 @@
 
 ### Bugs Fixed
 
-- Fixed `RedTeam.scan()` storing decoded plaintext instead of the actual encoded payload for converter-based attack strategies (`Base64`, `Flip`, `Morse`, `ROT13`, `Caesar`, `Leetspeak`, `AsciiArt`, `AnsiAttack`, `Atbash`, `Binary`, `CharacterSpace`, `CharSwap`, `Diacritic`, `StringJoin`, `SuffixAppend`, `UnicodeConfusable`, `UnicodeSubstitution`, `Url`, `AsciiSmuggler`, `Tense`) in `evaluation_results.json` / `results.json`. The persisted `conversation[].content` for user turns now reflects what the target actually received (`converted_value`); the pre-converter adversarial objective is preserved on the same message as a new `original_value` field so the audit trail of what the attack meant to say is not lost. Baseline (non-encoded) strategies are unaffected. Resolves [Azure/azure-sdk-for-python#47228](https://github.com/Azure/azure-sdk-for-python/issues/47228).
+- Fixed `RedTeam.scan()` storing decoded plaintext instead of the actual
+  encoded payload for converter-based attack strategies (Base64, Flip,
+  Morse, ROT13, etc.) in `evaluation_results.json` / `results.json`. The
+  persisted `conversation[].content` for user turns now reflects what the
+  target actually received (`converted_value`); the pre-converter
+  adversarial objective is preserved on the same message as a new
+  `original_value` field so the audit trail of what the attack meant to
+  say is not lost. Baseline (non-encoded) strategies are unaffected.
+  Resolves [#47228](https://github.com/Azure/azure-sdk-for-python/issues/47228).
 
 ## 1.17.0 (2026-06-03)
 
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_foundry_result_processor.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_foundry_result_processor.py
index d24b33a1c3fb..02d9daefe478 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_foundry_result_processor.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_foundry_result_processor.py
@@ -357,11 +357,16 @@ def _build_messages_from_pieces(
             # ``original_value`` on the same message so consumers can still
             # display / score against the decoded text without losing fidelity
             # of the actual attack surface.
+            #
+            # ``converted_value`` / ``original_value`` are passed through
+            # without forcing them to ``str`` so non-text payloads (bytes,
+            # structured / multimodal content) survive unchanged. ``content``
+            # falls back to ``""`` only when both fields are falsy / missing.
             original = getattr(piece, "original_value", None)
             converted = getattr(piece, "converted_value", None)
-            if isinstance(converted, str) and converted:
+            if converted:
                 content = converted
-            elif isinstance(original, str) and original:
+            elif original:
                 content = original
             else:
                 content = ""
@@ -375,10 +380,13 @@ def _build_messages_from_pieces(
             # transmitted content. This keeps the audit trail intact: callers
             # can compare ``content`` (what the target saw) with
             # ``original_value`` (what the attack meant to say) for every
-            # encoding-based strategy.
+            # encoding-based strategy. Restricted to strings because the
+            # audit field is only meaningful when both values are textual
+            # (and arbitrary cross-type inequality would be too aggressive).
             if (
                 isinstance(original, str)
                 and original
+                and isinstance(content, str)
                 and original != content
             ):
                 message["original_value"] = original
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_redteam/test_foundry.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_redteam/test_foundry.py
index f3e9cd45807f..de1d49c2ccb5 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_redteam/test_foundry.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_redteam/test_foundry.py
@@ -1518,6 +1518,63 @@ def test_build_messages_falls_back_to_original_when_converted_missing(self):
         # original == content here, so no separate audit field is needed.
         assert "original_value" not in messages[0]
 
+    def test_build_messages_preserves_non_string_payloads(self):
+        """Non-string ``converted_value`` payloads must survive unchanged.
+
+        PyRIT message pieces can carry structured / multimodal content
+        (e.g., bytes or list-of-parts payloads) on ``converted_value``.
+        ``content`` must pass those through so persisted conversations
+        remain a faithful record of what the target received; only the
+        ``original_value`` audit field is gated on both sides being text.
+        """
+        mock_scenario = MagicMock()
+        mock_dataset = MagicMock()
+        mock_dataset.get_all_seed_groups.return_value = []
+
+        processor = FoundryResultProcessor(
+            scenario=mock_scenario,
+            dataset_config=mock_dataset,
+            risk_category="violence",
+        )
+
+        # Structured multimodal-style payload on converted_value, plain
+        # string objective on original_value.
+        structured_payload = [
+            {"type": "text", "text": "describe this image"},
+            {"type": "image_url", "image_url": {"url": "https://example/img.png"}},
+        ]
+        user_piece = MagicMock()
+        user_piece.api_role = "user"
+        user_piece.original_value = "Describe this image"
+        user_piece.converted_value = structured_payload
+        user_piece.sequence = 0
+        user_piece.prompt_metadata = {}
+        user_piece.labels = {}
+
+        # Bytes payload on assistant converted_value — must not be coerced
+        # to "" by str-gating logic.
+        assistant_piece = MagicMock()
+        assistant_piece.api_role = "assistant"
+        assistant_piece.original_value = None
+        assistant_piece.converted_value = b"\x89PNG\r\n"
+        assistant_piece.sequence = 1
+        assistant_piece.prompt_metadata = {}
+        assistant_piece.labels = {}
+
+        messages = processor._build_messages_from_pieces([user_piece, assistant_piece])
+
+        # Structured user payload passed through unchanged.
+        assert messages[0]["role"] == "user"
+        assert messages[0]["content"] is structured_payload
+        # Audit field omitted: content is non-text so cross-type comparison
+        # against the str original would be meaningless.
+        assert "original_value" not in messages[0]
+
+        # Bytes assistant payload preserved (not silently dropped to "").
+        assert messages[1]["role"] == "assistant"
+        assert messages[1]["content"] == b"\x89PNG\r\n"
+        assert "original_value" not in messages[1]
+
     def test_get_prompt_group_id_from_conversation(self):
         """Test extracting prompt_group_id from conversation."""
         mock_scenario = MagicMock()