From 41cfc7df606d74d6cc475234401852dbc8414f62 Mon Sep 17 00:00:00 2001 From: Erik Miehling Date: Thu, 8 Jan 2026 19:10:38 +0000 Subject: [PATCH 1/9] Functionality to interface with AISteer360's steering pipelines Signed-off-by: Erik Miehling --- .pre-commit-config.yaml | 5 + .secrets.baseline | 4 +- README.md | 5 +- icx360/algorithms/mexgen/__init__.py | 2 +- icx360/algorithms/mexgen/mexgen.py | 6 +- icx360/utils/model_wrappers/__init__.py | 2 +- icx360/utils/model_wrappers/huggingface.py | 87 ++++++++++++ icx360/utils/scalarizers/prob.py | 146 ++++++++++----------- icx360/utils/segmenters/__init__.py | 6 +- 9 files changed, 181 insertions(+), 82 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fe1f0b5..46505bf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,3 +25,8 @@ repos: - id: isort name: isort (python) args: ["--profile", "black", "--filter-files"] + - repo: https://github.com/ibm/detect-secrets + rev: 0.13.1+ibm.62.dss + hooks: + - id: detect-secrets + additional_dependencies: [ boxsdk<4 ] diff --git a/.secrets.baseline b/.secrets.baseline index 39f9f42..c51f27a 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,7 +3,7 @@ "files": "^.secrets.baseline$", "lines": null }, - "generated_at": "2025-07-26T17:25:52Z", + "generated_at": "2026-01-08T19:07:43Z", "plugins_used": [ { "name": "AWSKeyDetector" @@ -187,7 +187,7 @@ } ] }, - "version": "0.13.1+ibm.61.dss", + "version": "0.13.1+ibm.62.dss", "word_list": { "file": null, "hash": null diff --git a/README.md b/README.md index 07a8499..86385e9 100644 --- a/README.md +++ b/README.md @@ -113,13 +113,13 @@ pytest -m "vllm" If you find ICX360 useful, please star the repository and cite our work as follows: ``` @misc{wei2025icx360, - title={{ICX360}: {In-Context eXplainability} 360 Toolkit}, + title={{ICX360}: {In-Context eXplainability} 360 Toolkit}, author={Dennis Wei and Ronny Luss and Xiaomeng Hu and Lucas Monteiro Paes and Pin-Yu Chen and Karthikeyan Natesan Ramamurthy and Erik Miehling and Inge Vejsbjerg and Hendrik Strobelt}, year={2025}, eprint={2511.10879}, archivePrefix={arXiv}, primaryClass={cs.CL}, - url={https://arxiv.org/abs/2511.10879}, + url={https://arxiv.org/abs/2511.10879}, } ``` @@ -139,4 +139,3 @@ Lets form a community around this toolkit! Ask a question, raise an issue, or ex ## IBM ❤️ Open Source AI The first release of ICX360 has been brought to you by IBM in the hope of building a larger community around this topic. - diff --git a/icx360/algorithms/mexgen/__init__.py b/icx360/algorithms/mexgen/__init__.py index dd6bc70..c7d7c38 100644 --- a/icx360/algorithms/mexgen/__init__.py +++ b/icx360/algorithms/mexgen/__init__.py @@ -2,6 +2,6 @@ Module containing submodules for MExGen C-LIME and MExGen L-SHAP explainers """ -from .mexgen import MExGenExplainer from .clime import CLIME from .lshap import LSHAP +from .mexgen import MExGenExplainer diff --git a/icx360/algorithms/mexgen/mexgen.py b/icx360/algorithms/mexgen/mexgen.py index 928bab1..330837b 100644 --- a/icx360/algorithms/mexgen/mexgen.py +++ b/icx360/algorithms/mexgen/mexgen.py @@ -12,7 +12,11 @@ from icx360.algorithms.lbbe import LocalBBExplainer from icx360.utils.model_wrappers import GeneratedOutput, HFModel from icx360.utils.scalarizers import ProbScalarizedModel, TextScalarizedModel -from icx360.utils.segmenters import SpaCySegmenter, exclude_non_alphanumeric, merge_non_alphanumeric +from icx360.utils.segmenters import ( + SpaCySegmenter, + exclude_non_alphanumeric, + merge_non_alphanumeric, +) class MExGenExplainer(LocalBBExplainer): diff --git a/icx360/utils/model_wrappers/__init__.py b/icx360/utils/model_wrappers/__init__.py index 646f793..c234c87 100644 --- a/icx360/utils/model_wrappers/__init__.py +++ b/icx360/utils/model_wrappers/__init__.py @@ -5,5 +5,5 @@ """ from .base_model_wrapper import GeneratedOutput, Model -from .huggingface import HFModel +from .huggingface import HFModel, PipelineHFModel from .vllm import VLLMModel diff --git a/icx360/utils/model_wrappers/huggingface.py b/icx360/utils/model_wrappers/huggingface.py index ac7fcd3..a2e5298 100644 --- a/icx360/utils/model_wrappers/huggingface.py +++ b/icx360/utils/model_wrappers/huggingface.py @@ -154,3 +154,90 @@ def generate(self, inputs, chat_template=False, system_prompt=None, tokenizer_kw output_obj = GeneratedOutput(output_ids=output_ids, output_text=output_text, output_token_count=output_token_count) return output_obj + + +class PipelineHFModel(HFModel): + """ + HFModel-compatible wrapper around a SteeringPipeline. + + """ + + def __init__(self, pipeline, tokenizer, runtime_kwargs: dict | None = None): + super().__init__(pipeline.model, tokenizer) + + self._pipeline = pipeline + self._device = pipeline.model.device + self._runtime_kwargs = runtime_kwargs + + def generate( + self, + inputs, + chat_template: bool = False, + system_prompt: str | None = None, + tokenizer_kwargs: dict = {}, + text_only: bool = True, + **kwargs, + ): + """ + Generate response from SteeringPipeline. + + Args: + inputs (str or List[str] or List[List[str]]): + A single input text, a list of input texts, or a list of segmented texts. + chat_template (bool): + Whether to apply chat template. + system_prompt (str or None): + System prompt to include in chat template. + tokenizer_kwargs (dict): + Additional keyword arguments for tokenizer. + text_only (bool): + Return only generated text (default) or an object containing additional outputs. + **kwargs (dict): + Additional keyword arguments for pipeline. + + Returns: + output_obj (List[str] or icx360.utils.model_wrappers.GeneratedOutput): + If text_only == True, a list of generated texts corresponding to inputs. + If text_only == False, a GeneratedOutput object containing the following: + output_ids: (num_inputs, output_token_count) torch.Tensor of generated token IDs. + output_text: List of generated texts. + output_token_count: Maximum number of generated tokens. + """ + + encoding = self.convert_input( + inputs, + chat_template=chat_template, + system_prompt=system_prompt, + **tokenizer_kwargs, + ) + input_ids = encoding["input_ids"] + attention_mask = encoding["attention_mask"] + input_length = input_ids.shape[1] + + runtime_kwargs = self._runtime_kwargs + + with torch.no_grad(): + output_ids = self._pipeline.generate( + input_ids=input_ids, + attention_mask=attention_mask, + runtime_kwargs=runtime_kwargs, + **kwargs, + ) + + if not self._model.config.is_encoder_decoder: + output_ids = output_ids[:, input_length:] + + output_text = self._tokenizer.batch_decode( + output_ids, + skip_special_tokens=True, + clean_up_tokenization_spaces=True, + ) + + if text_only: + return output_text + else: + return GeneratedOutput( + output_ids=output_ids, + output_text=output_text, + output_token_count=output_ids.shape[1], + ) diff --git a/icx360/utils/scalarizers/prob.py b/icx360/utils/scalarizers/prob.py index 5956d37..80772f0 100644 --- a/icx360/utils/scalarizers/prob.py +++ b/icx360/utils/scalarizers/prob.py @@ -10,9 +10,8 @@ import torch -from icx360.utils.model_wrappers import HFModel, VLLMModel +from icx360.utils.model_wrappers import HFModel, PipelineHFModel, VLLMModel from icx360.utils.scalarizers import Scalarizer -from icx360.utils.segmenters import find_unit_boundaries from icx360.utils.toma import toma_get_probs @@ -42,7 +41,7 @@ def __init__(self, model): def scalarize_output(self, inputs=None, outputs=None, ref_input=None, ref_output=None, chat_template=False, system_prompt=None, tokenizer_kwargs={}, transformation="log_prob_mean", **kwargs): """ - Compute probability of generating reference output (or each unit thereof) conditioned on inputs. + Compute probability of reference output conditioned on inputs. Args: inputs (str or List[str] or List[List[str]]): @@ -70,8 +69,8 @@ def scalarize_output(self, inputs=None, outputs=None, ref_input=None, ref_output Additional keyword arguments for model. Returns: - probs_transformed ((num_inputs, num_output_units) torch.Tensor): - Transformed probability of generating each unit of the reference output conditioned on each input. + probs_transformed ((num_inputs,) torch.Tensor): + Transformed probability of generating the reference output conditioned on each input. """ # Check for and convert inputs if inputs is None: @@ -83,27 +82,22 @@ def scalarize_output(self, inputs=None, outputs=None, ref_input=None, ref_output raise ValueError("ref_output must be provided for ProbScalarizedModel.scalarize_output()") # Compute log probabilities of reference output tokens conditioned on inputs - # Also find token boundaries of units of the reference output - if isinstance(self.model, HFModel): - log_probs, boundaries = self._compute_log_probs_hf(inputs, ref_output, **kwargs) + if isinstance(self.model, PipelineHFModel): + log_probs = self._compute_log_probs_pipeline(inputs, ref_output, **kwargs) + elif isinstance(self.model, HFModel): + log_probs = self._compute_log_probs_hf(inputs, ref_output, **kwargs) elif isinstance(self.model, VLLMModel): - log_probs, boundaries = self._compute_log_probs_vllm(inputs, ref_output, **kwargs) - - # Initialize transformed probabilities - num_output_units = len(boundaries) - 1 - probs_transformed = torch.zeros(log_probs.shape[0], num_output_units) - # Iterate over reference output units - for u in range(num_output_units): - # Transform probabilities - if transformation in ("log_prob_mean", "prob_geo_mean"): - if boundaries[u + 1] > boundaries[u]: - # Mean of log probabilities (only if this unit has a non-zero number of tokens) - probs_transformed[:, u] = log_probs[:, boundaries[u] : boundaries[u + 1]].mean(dim=1) - elif transformation in ("log_prob_sum", "prob_prod"): - # Sum of log probabilities - probs_transformed[:, u] = log_probs[:, boundaries[u] : boundaries[u + 1]].sum(dim=1) - else: - raise ValueError("Transformation not recognized") + log_probs = self._compute_log_probs_vllm(inputs, ref_output, **kwargs) + + # Transform probabilities + if transformation in ("log_prob_mean", "prob_geo_mean"): + # Mean of log probabilities + probs_transformed = log_probs.mean(dim=1) + elif transformation in ("log_prob_sum", "prob_prod"): + # Sum of log probabilities + probs_transformed = log_probs.sum(dim=1) + else: + raise ValueError("Transformation not recognized") if transformation.startswith("prob"): # Convert log probabilities to probabilities probs_transformed = probs_transformed.exp() @@ -125,28 +119,26 @@ def _compute_log_probs_hf(self, inputs, ref_output, **kwargs): Returns: log_probs ((num_inputs, gen_length) torch.Tensor): Log probabilities of reference output tokens. - boundaries (List[int]): - Token boundaries of units of the reference output. """ num_inputs = inputs["input_ids"].shape[0] # Get token IDs of reference output - output_ids = ref_output.output_ids + ref_output = ref_output.output_ids # Number of generated tokens in output # encoder-decoder output always begins with a fixed special token e.g. , # while decoder-only output has been truncated to only the generated response - gen_length = output_ids.shape[1] - self.model._model.config.is_encoder_decoder + gen_length = ref_output.shape[1] - self.model._model.config.is_encoder_decoder if num_inputs == 1 or not torch.cuda.is_available(): # Call underlying HuggingFace model on given input and output sequences to obtain logits - output_ids_expanded = output_ids.expand(num_inputs, -1) + ref_output_expanded = ref_output.expand(num_inputs, -1) with torch.no_grad(): if self.model._model.config.is_encoder_decoder: # Encoder-decoder model: pass inputs and reference output as separate arguments - output_dict = self.model._model(**inputs, decoder_input_ids=output_ids_expanded) + output_dict = self.model._model(**inputs, decoder_input_ids=ref_output_expanded) else: # Decoder-only model: concatenate inputs with reference output - combined_input_output = torch.cat([inputs["input_ids"], output_ids_expanded], dim=1) + combined_input_output = torch.cat([inputs["input_ids"], ref_output_expanded], dim=1) output_dict = self.model._model(combined_input_output) # Position where generated output starts (in concatenated input-output for decoder-only) @@ -156,9 +148,9 @@ def _compute_log_probs_hf(self, inputs, ref_output, **kwargs): scores = tuple(output_dict.logits[:, pos, :] for pos in range(gen_start - 1, gen_start + gen_length - 1)) # Compute probabilities of tokens in reference output - # NOTE: although output_ids_expanded and scores have different token lengths, + # NOTE: although ref_output_expanded and scores have different token lengths, # compute_transition_scores() seems to align their last positions - log_probs = self.model._model.compute_transition_scores(output_ids_expanded, scores, normalize_logits=True) + log_probs = self.model._model.compute_transition_scores(ref_output_expanded, scores, normalize_logits=True) else: # Call using toma @@ -167,18 +159,47 @@ def _compute_log_probs_hf(self, inputs, ref_output, **kwargs): # Call using toma batch_size_init = 2 ** ceil(log2(num_inputs)) - toma_get_probs(0, num_inputs, self.model._model, inputs, output_ids, log_probs, toma_initial_step=batch_size_init) + toma_get_probs(0, num_inputs, self.model._model, inputs, ref_output, log_probs, toma_initial_step=batch_size_init) - # Get list of reference output tokens - tokens = [] - for id in output_ids[0]: - tokens.append("" if id in self.model._tokenizer.all_special_ids else self.model._tokenizer.decode(id)) - # Find token boundaries of units of the reference output - boundaries = find_unit_boundaries(ref_output.output_text[0], tokens) + return log_probs - return log_probs, boundaries + def _compute_log_probs_pipeline(self, inputs, ref_output, **kwargs): + """ + Compute log probabilities of reference output tokens conditioned on inputs when self.model is a PipelineHFModel. - def _compute_log_probs_vllm(self, inputs, ref_output, max_inputs_per_call=200, **kwargs): + Delegates to the underlying SteeringPipeline.compute_log_probs. + """ + if not isinstance(self.model, PipelineHFModel): + raise TypeError("_compute_log_probs_pipeline requires a PipelineHFModel") + + pipeline_model = self.model # icx360.utils.model_wrappers.PipelineHFModel + pipeline = pipeline_model._pipeline # aisteer360.algorithms.core.SteeringPipeline + + # inputs is a transformers.BatchEncoding from convert_input() + input_ids = inputs["input_ids"] + attention_mask = inputs.get("attention_mask", None) + + # reference output token IDs + ref_output_ids = ref_output.output_ids + device = pipeline_model._device + + if ref_output_ids.device != device: + ref_output_ids = ref_output_ids.to(device) + + with torch.no_grad(): + log_probs = pipeline.compute_log_probs( + input_ids=input_ids, + attention_mask=attention_mask, + ref_output_ids=ref_output_ids, + runtime_kwargs=pipeline_model._runtime_kwargs, + **kwargs, + ) + + # log_probs must be shape: (num_inputs, gen_length) + return log_probs + + + def _compute_log_probs_vllm(self, inputs, ref_output, **kwargs): """ Compute log probabilities of reference output tokens conditioned on inputs for a VLLMModel. @@ -193,46 +214,25 @@ def _compute_log_probs_vllm(self, inputs, ref_output, max_inputs_per_call=200, * Returns: log_probs ((num_inputs, gen_length) torch.Tensor): Log probabilities of reference output tokens. - boundaries (List[int]): - Token boundaries of units of the reference output. """ # VLLM parameters for computing log probs of a given input + output without generating kwargs["logprobs"] = 0 kwargs["max_tokens"] = 0 kwargs["echo"] = True - # Number of batch inference calls - num_calls = ceil(len(inputs) / max_inputs_per_call) - # Call underlying VLLM model on inputs only to get their token lengths + completion = self.model._model.completions.create(model=self.model._model_name, prompt=inputs, **kwargs) input_lengths = [] - for call in range(num_calls): - if num_calls > 1: - print(f"Call {call + 1} of {num_calls}") - completion = self.model._model.completions.create( - model=self.model._model_name, - prompt=inputs[call * max_inputs_per_call : (call + 1) * max_inputs_per_call], - **kwargs - ) - for result in completion.choices: - input_lengths.append(len(result.logprobs.tokens)) + for result in completion.choices: + input_lengths.append(len(result.logprobs.tokens)) # Combined inputs + output - combined_input_output = [inp + "".join(ref_output.output_text[0]) for inp in inputs] + combined_input_output = [inp + ref_output.output_text[0] for inp in inputs] # Call VLLM model on combined inputs + output to get log probs + completion = self.model._model.completions.create(model=self.model._model_name, prompt=combined_input_output, **kwargs) log_probs = [] - for call in range(num_calls): - completion = self.model._model.completions.create( - model=self.model._model_name, - prompt=combined_input_output[call * max_inputs_per_call : (call + 1) * max_inputs_per_call], - **kwargs - ) - for i, result in enumerate(completion.choices): - log_probs.append(result.logprobs.token_logprobs[input_lengths[call * max_inputs_per_call + i]:]) - - # Find token boundaries of units of the reference output - boundaries = find_unit_boundaries(ref_output.output_text[0], - result.logprobs.tokens[input_lengths[call * max_inputs_per_call + i]:]) - - return torch.tensor(log_probs), boundaries + for i, result in enumerate(completion.choices): + log_probs.append(result.logprobs.token_logprobs[input_lengths[i]:]) + + return torch.tensor(log_probs) diff --git a/icx360/utils/segmenters/__init__.py b/icx360/utils/segmenters/__init__.py index 59de919..1e93c6b 100644 --- a/icx360/utils/segmenters/__init__.py +++ b/icx360/utils/segmenters/__init__.py @@ -5,4 +5,8 @@ """ from .spacy import SpaCySegmenter -from .utils import exclude_non_alphanumeric, find_unit_boundaries, merge_non_alphanumeric +from .utils import ( + exclude_non_alphanumeric, + find_unit_boundaries, + merge_non_alphanumeric, +) From 23a32b4c607e375ae16214c61706c625a25dac17 Mon Sep 17 00:00:00 2001 From: Dennis Wei Date: Thu, 8 Jan 2026 14:02:10 -0800 Subject: [PATCH 2/9] resolve conflict with PR #11 in prob.py Signed-off-by: Dennis Wei --- icx360/utils/scalarizers/prob.py | 105 +++++++++++++++++++++---------- 1 file changed, 71 insertions(+), 34 deletions(-) diff --git a/icx360/utils/scalarizers/prob.py b/icx360/utils/scalarizers/prob.py index 80772f0..4915b54 100644 --- a/icx360/utils/scalarizers/prob.py +++ b/icx360/utils/scalarizers/prob.py @@ -12,6 +12,7 @@ from icx360.utils.model_wrappers import HFModel, PipelineHFModel, VLLMModel from icx360.utils.scalarizers import Scalarizer +from icx360.utils.segmenters import find_unit_boundaries from icx360.utils.toma import toma_get_probs @@ -41,7 +42,7 @@ def __init__(self, model): def scalarize_output(self, inputs=None, outputs=None, ref_input=None, ref_output=None, chat_template=False, system_prompt=None, tokenizer_kwargs={}, transformation="log_prob_mean", **kwargs): """ - Compute probability of reference output conditioned on inputs. + Compute probability of generating reference output (or each unit thereof) conditioned on inputs. Args: inputs (str or List[str] or List[List[str]]): @@ -69,8 +70,8 @@ def scalarize_output(self, inputs=None, outputs=None, ref_input=None, ref_output Additional keyword arguments for model. Returns: - probs_transformed ((num_inputs,) torch.Tensor): - Transformed probability of generating the reference output conditioned on each input. + probs_transformed ((num_inputs, num_output_units) torch.Tensor): + Transformed probability of generating each unit of the reference output conditioned on each input. """ # Check for and convert inputs if inputs is None: @@ -85,19 +86,25 @@ def scalarize_output(self, inputs=None, outputs=None, ref_input=None, ref_output if isinstance(self.model, PipelineHFModel): log_probs = self._compute_log_probs_pipeline(inputs, ref_output, **kwargs) elif isinstance(self.model, HFModel): - log_probs = self._compute_log_probs_hf(inputs, ref_output, **kwargs) + log_probs, boundaries = self._compute_log_probs_hf(inputs, ref_output, **kwargs) elif isinstance(self.model, VLLMModel): - log_probs = self._compute_log_probs_vllm(inputs, ref_output, **kwargs) - - # Transform probabilities - if transformation in ("log_prob_mean", "prob_geo_mean"): - # Mean of log probabilities - probs_transformed = log_probs.mean(dim=1) - elif transformation in ("log_prob_sum", "prob_prod"): - # Sum of log probabilities - probs_transformed = log_probs.sum(dim=1) - else: - raise ValueError("Transformation not recognized") + log_probs, boundaries = self._compute_log_probs_vllm(inputs, ref_output, **kwargs) + + # Initialize transformed probabilities + num_output_units = len(boundaries) - 1 + probs_transformed = torch.zeros(log_probs.shape[0], num_output_units) + # Iterate over reference output units + for u in range(num_output_units): + # Transform probabilities + if transformation in ("log_prob_mean", "prob_geo_mean"): + if boundaries[u + 1] > boundaries[u]: + # Mean of log probabilities (only if this unit has a non-zero number of tokens) + probs_transformed[:, u] = log_probs[:, boundaries[u] : boundaries[u + 1]].mean(dim=1) + elif transformation in ("log_prob_sum", "prob_prod"): + # Sum of log probabilities + probs_transformed[:, u] = log_probs[:, boundaries[u] : boundaries[u + 1]].sum(dim=1) + else: + raise ValueError("Transformation not recognized") if transformation.startswith("prob"): # Convert log probabilities to probabilities probs_transformed = probs_transformed.exp() @@ -119,26 +126,28 @@ def _compute_log_probs_hf(self, inputs, ref_output, **kwargs): Returns: log_probs ((num_inputs, gen_length) torch.Tensor): Log probabilities of reference output tokens. + boundaries (List[int]): + Token boundaries of units of the reference output. """ num_inputs = inputs["input_ids"].shape[0] # Get token IDs of reference output - ref_output = ref_output.output_ids + output_ids = ref_output.output_ids # Number of generated tokens in output # encoder-decoder output always begins with a fixed special token e.g. , # while decoder-only output has been truncated to only the generated response - gen_length = ref_output.shape[1] - self.model._model.config.is_encoder_decoder + gen_length = output_ids.shape[1] - self.model._model.config.is_encoder_decoder if num_inputs == 1 or not torch.cuda.is_available(): # Call underlying HuggingFace model on given input and output sequences to obtain logits - ref_output_expanded = ref_output.expand(num_inputs, -1) + output_ids_expanded = output_ids.expand(num_inputs, -1) with torch.no_grad(): if self.model._model.config.is_encoder_decoder: # Encoder-decoder model: pass inputs and reference output as separate arguments - output_dict = self.model._model(**inputs, decoder_input_ids=ref_output_expanded) + output_dict = self.model._model(**inputs, decoder_input_ids=output_ids_expanded) else: # Decoder-only model: concatenate inputs with reference output - combined_input_output = torch.cat([inputs["input_ids"], ref_output_expanded], dim=1) + combined_input_output = torch.cat([inputs["input_ids"], output_ids_expanded], dim=1) output_dict = self.model._model(combined_input_output) # Position where generated output starts (in concatenated input-output for decoder-only) @@ -148,9 +157,9 @@ def _compute_log_probs_hf(self, inputs, ref_output, **kwargs): scores = tuple(output_dict.logits[:, pos, :] for pos in range(gen_start - 1, gen_start + gen_length - 1)) # Compute probabilities of tokens in reference output - # NOTE: although ref_output_expanded and scores have different token lengths, + # NOTE: although output_ids_expanded and scores have different token lengths, # compute_transition_scores() seems to align their last positions - log_probs = self.model._model.compute_transition_scores(ref_output_expanded, scores, normalize_logits=True) + log_probs = self.model._model.compute_transition_scores(output_ids_expanded, scores, normalize_logits=True) else: # Call using toma @@ -159,9 +168,16 @@ def _compute_log_probs_hf(self, inputs, ref_output, **kwargs): # Call using toma batch_size_init = 2 ** ceil(log2(num_inputs)) - toma_get_probs(0, num_inputs, self.model._model, inputs, ref_output, log_probs, toma_initial_step=batch_size_init) + toma_get_probs(0, num_inputs, self.model._model, inputs, output_ids, log_probs, toma_initial_step=batch_size_init) - return log_probs + # Get list of reference output tokens + tokens = [] + for id in output_ids[0]: + tokens.append("" if id in self.model._tokenizer.all_special_ids else self.model._tokenizer.decode(id)) + # Find token boundaries of units of the reference output + boundaries = find_unit_boundaries(ref_output.output_text[0], tokens) + + return log_probs, boundaries def _compute_log_probs_pipeline(self, inputs, ref_output, **kwargs): """ @@ -199,7 +215,7 @@ def _compute_log_probs_pipeline(self, inputs, ref_output, **kwargs): return log_probs - def _compute_log_probs_vllm(self, inputs, ref_output, **kwargs): + def _compute_log_probs_vllm(self, inputs, ref_output, max_inputs_per_call=200, **kwargs): """ Compute log probabilities of reference output tokens conditioned on inputs for a VLLMModel. @@ -214,25 +230,46 @@ def _compute_log_probs_vllm(self, inputs, ref_output, **kwargs): Returns: log_probs ((num_inputs, gen_length) torch.Tensor): Log probabilities of reference output tokens. + boundaries (List[int]): + Token boundaries of units of the reference output. """ # VLLM parameters for computing log probs of a given input + output without generating kwargs["logprobs"] = 0 kwargs["max_tokens"] = 0 kwargs["echo"] = True + # Number of batch inference calls + num_calls = ceil(len(inputs) / max_inputs_per_call) + # Call underlying VLLM model on inputs only to get their token lengths - completion = self.model._model.completions.create(model=self.model._model_name, prompt=inputs, **kwargs) input_lengths = [] - for result in completion.choices: - input_lengths.append(len(result.logprobs.tokens)) + for call in range(num_calls): + if num_calls > 1: + print(f"Call {call + 1} of {num_calls}") + completion = self.model._model.completions.create( + model=self.model._model_name, + prompt=inputs[call * max_inputs_per_call : (call + 1) * max_inputs_per_call], + **kwargs + ) + for result in completion.choices: + input_lengths.append(len(result.logprobs.tokens)) # Combined inputs + output - combined_input_output = [inp + ref_output.output_text[0] for inp in inputs] + combined_input_output = [inp + "".join(ref_output.output_text[0]) for inp in inputs] # Call VLLM model on combined inputs + output to get log probs - completion = self.model._model.completions.create(model=self.model._model_name, prompt=combined_input_output, **kwargs) log_probs = [] - for i, result in enumerate(completion.choices): - log_probs.append(result.logprobs.token_logprobs[input_lengths[i]:]) - - return torch.tensor(log_probs) + for call in range(num_calls): + completion = self.model._model.completions.create( + model=self.model._model_name, + prompt=combined_input_output[call * max_inputs_per_call : (call + 1) * max_inputs_per_call], + **kwargs + ) + for i, result in enumerate(completion.choices): + log_probs.append(result.logprobs.token_logprobs[input_lengths[call * max_inputs_per_call + i]:]) + + # Find token boundaries of units of the reference output + boundaries = find_unit_boundaries(ref_output.output_text[0], + result.logprobs.tokens[input_lengths[call * max_inputs_per_call + i]:]) + + return torch.tensor(log_probs), boundaries From 055558b04f46b37497251cfef98a116e862a4fc9 Mon Sep 17 00:00:00 2001 From: Dennis Wei Date: Thu, 8 Jan 2026 14:07:53 -0800 Subject: [PATCH 3/9] one more missing line Signed-off-by: Dennis Wei --- icx360/utils/scalarizers/prob.py | 1 + 1 file changed, 1 insertion(+) diff --git a/icx360/utils/scalarizers/prob.py b/icx360/utils/scalarizers/prob.py index 4915b54..cf07ea8 100644 --- a/icx360/utils/scalarizers/prob.py +++ b/icx360/utils/scalarizers/prob.py @@ -83,6 +83,7 @@ def scalarize_output(self, inputs=None, outputs=None, ref_input=None, ref_output raise ValueError("ref_output must be provided for ProbScalarizedModel.scalarize_output()") # Compute log probabilities of reference output tokens conditioned on inputs + # Also find token boundaries of units of the reference output if isinstance(self.model, PipelineHFModel): log_probs = self._compute_log_probs_pipeline(inputs, ref_output, **kwargs) elif isinstance(self.model, HFModel): From ecb33b7ce6a9d6154e37aa42ac1f33a5774474f6 Mon Sep 17 00:00:00 2001 From: Dennis Wei Date: Thu, 8 Jan 2026 14:17:32 -0800 Subject: [PATCH 4/9] return output unit boundaries from _compute_log_probs_pipeline to be compatible with PR #11 Signed-off-by: Dennis Wei --- icx360/utils/scalarizers/prob.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/icx360/utils/scalarizers/prob.py b/icx360/utils/scalarizers/prob.py index cf07ea8..1e13498 100644 --- a/icx360/utils/scalarizers/prob.py +++ b/icx360/utils/scalarizers/prob.py @@ -85,7 +85,7 @@ def scalarize_output(self, inputs=None, outputs=None, ref_input=None, ref_output # Compute log probabilities of reference output tokens conditioned on inputs # Also find token boundaries of units of the reference output if isinstance(self.model, PipelineHFModel): - log_probs = self._compute_log_probs_pipeline(inputs, ref_output, **kwargs) + log_probs, boundaries = self._compute_log_probs_pipeline(inputs, ref_output, **kwargs) elif isinstance(self.model, HFModel): log_probs, boundaries = self._compute_log_probs_hf(inputs, ref_output, **kwargs) elif isinstance(self.model, VLLMModel): @@ -212,8 +212,15 @@ def _compute_log_probs_pipeline(self, inputs, ref_output, **kwargs): **kwargs, ) + # Get list of reference output tokens + tokens = [] + for id in ref_output_ids[0]: + tokens.append("" if id in self.model._tokenizer.all_special_ids else self.model._tokenizer.decode(id)) + # Find token boundaries of units of the reference output + boundaries = find_unit_boundaries(ref_output.output_text[0], tokens) + # log_probs must be shape: (num_inputs, gen_length) - return log_probs + return log_probs, boundaries def _compute_log_probs_vllm(self, inputs, ref_output, max_inputs_per_call=200, **kwargs): From 4cab6be6dbfb6f03650389069d143e70245c87c6 Mon Sep 17 00:00:00 2001 From: Dennis Wei Date: Thu, 8 Jan 2026 14:29:44 -0800 Subject: [PATCH 5/9] complete docstrings Signed-off-by: Dennis Wei --- icx360/utils/model_wrappers/huggingface.py | 23 +++++++++++++++++++++- icx360/utils/scalarizers/prob.py | 14 +++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/icx360/utils/model_wrappers/huggingface.py b/icx360/utils/model_wrappers/huggingface.py index a2e5298..a0fe0e5 100644 --- a/icx360/utils/model_wrappers/huggingface.py +++ b/icx360/utils/model_wrappers/huggingface.py @@ -160,13 +160,34 @@ class PipelineHFModel(HFModel): """ HFModel-compatible wrapper around a SteeringPipeline. + Attributes: + _pipeline (aisteer360.algorithms.core.SteeringPipeline): + AISteer360 SteeringPipeline object. + _model (transformers model object): + Underlying model object. + _tokenizer (transformers tokenizer): + Tokenizer corresponding to model. + _device (str): + Device on which the model resides. + _runtime_kwargs (dict or None): + Runtime keyword arguments. """ def __init__(self, pipeline, tokenizer, runtime_kwargs: dict | None = None): + """ + Initialize PipelineHFModel wrapper. + + Args: + pipeline (aisteer360.algorithms.core.SteeringPipeline): + AISteer360 SteeringPipeline object. + tokenizer (transformers tokenizer): + Tokenizer corresponding to model. + runtime_kwargs (dict or None): + Runtime keyword arguments. + """ super().__init__(pipeline.model, tokenizer) self._pipeline = pipeline - self._device = pipeline.model.device self._runtime_kwargs = runtime_kwargs def generate( diff --git a/icx360/utils/scalarizers/prob.py b/icx360/utils/scalarizers/prob.py index 1e13498..d80d9c7 100644 --- a/icx360/utils/scalarizers/prob.py +++ b/icx360/utils/scalarizers/prob.py @@ -185,6 +185,20 @@ def _compute_log_probs_pipeline(self, inputs, ref_output, **kwargs): Compute log probabilities of reference output tokens conditioned on inputs when self.model is a PipelineHFModel. Delegates to the underlying SteeringPipeline.compute_log_probs. + + Args: + inputs (transformers.BatchEncoding): + BatchEncoding of inputs produced by tokenizer. + ref_output (icx360.utils.model_wrappers.GeneratedOutput): + Reference output object containing a sequence of token IDs (ref_output.output_ids). + **kwargs (dict): + Additional keyword arguments for model. + + Returns: + log_probs ((num_inputs, gen_length) torch.Tensor): + Log probabilities of reference output tokens. + boundaries (List[int]): + Token boundaries of units of the reference output. """ if not isinstance(self.model, PipelineHFModel): raise TypeError("_compute_log_probs_pipeline requires a PipelineHFModel") From 1f0c02f54c7b7ea3f1c27a19d99eed773f973608 Mon Sep 17 00:00:00 2001 From: Dennis Wei Date: Thu, 8 Jan 2026 17:31:06 -0800 Subject: [PATCH 6/9] avoid circular import Signed-off-by: Dennis Wei --- icx360/algorithms/mexgen/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icx360/algorithms/mexgen/__init__.py b/icx360/algorithms/mexgen/__init__.py index c7d7c38..dd6bc70 100644 --- a/icx360/algorithms/mexgen/__init__.py +++ b/icx360/algorithms/mexgen/__init__.py @@ -2,6 +2,6 @@ Module containing submodules for MExGen C-LIME and MExGen L-SHAP explainers """ +from .mexgen import MExGenExplainer from .clime import CLIME from .lshap import LSHAP -from .mexgen import MExGenExplainer From 1f1295b6dbe7013c9b05f6256dbb60a4391cadf9 Mon Sep 17 00:00:00 2001 From: Erik Miehling Date: Thu, 15 Jan 2026 11:25:12 +0000 Subject: [PATCH 7/9] Clarify runtime_kwargs comments; pre-commit check/fix on mexgen init Signed-off-by: Erik Miehling --- icx360/utils/model_wrappers/huggingface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icx360/utils/model_wrappers/huggingface.py b/icx360/utils/model_wrappers/huggingface.py index a0fe0e5..887c62d 100644 --- a/icx360/utils/model_wrappers/huggingface.py +++ b/icx360/utils/model_wrappers/huggingface.py @@ -170,7 +170,7 @@ class PipelineHFModel(HFModel): _device (str): Device on which the model resides. _runtime_kwargs (dict or None): - Runtime keyword arguments. + Optional per-call parameters for controls at runtime. """ def __init__(self, pipeline, tokenizer, runtime_kwargs: dict | None = None): @@ -183,7 +183,7 @@ def __init__(self, pipeline, tokenizer, runtime_kwargs: dict | None = None): tokenizer (transformers tokenizer): Tokenizer corresponding to model. runtime_kwargs (dict or None): - Runtime keyword arguments. + Optional per-call parameters for controls at runtime. """ super().__init__(pipeline.model, tokenizer) From ef97f93fc70a6abf1a4172392226c7aa9781b5d2 Mon Sep 17 00:00:00 2001 From: Dennis Wei Date: Thu, 15 Jan 2026 12:21:04 -0800 Subject: [PATCH 8/9] rename compute_log_probs to compute_logprobs Signed-off-by: Dennis Wei --- icx360/utils/scalarizers/prob.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icx360/utils/scalarizers/prob.py b/icx360/utils/scalarizers/prob.py index d80d9c7..a62e327 100644 --- a/icx360/utils/scalarizers/prob.py +++ b/icx360/utils/scalarizers/prob.py @@ -184,7 +184,7 @@ def _compute_log_probs_pipeline(self, inputs, ref_output, **kwargs): """ Compute log probabilities of reference output tokens conditioned on inputs when self.model is a PipelineHFModel. - Delegates to the underlying SteeringPipeline.compute_log_probs. + Delegates to the underlying SteeringPipeline.compute_logprobs. Args: inputs (transformers.BatchEncoding): @@ -218,7 +218,7 @@ def _compute_log_probs_pipeline(self, inputs, ref_output, **kwargs): ref_output_ids = ref_output_ids.to(device) with torch.no_grad(): - log_probs = pipeline.compute_log_probs( + log_probs = pipeline.compute_logprobs( input_ids=input_ids, attention_mask=attention_mask, ref_output_ids=ref_output_ids, From 00e3470402895f8ae0f41b7adde0a019807fa397 Mon Sep 17 00:00:00 2001 From: Dennis Wei Date: Thu, 15 Jan 2026 12:25:59 -0800 Subject: [PATCH 9/9] don't over-truncate output tokens in PipelineHFModel.generate() Signed-off-by: Dennis Wei --- icx360/utils/model_wrappers/huggingface.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/icx360/utils/model_wrappers/huggingface.py b/icx360/utils/model_wrappers/huggingface.py index 887c62d..4868536 100644 --- a/icx360/utils/model_wrappers/huggingface.py +++ b/icx360/utils/model_wrappers/huggingface.py @@ -244,9 +244,7 @@ def generate( runtime_kwargs=runtime_kwargs, **kwargs, ) - - if not self._model.config.is_encoder_decoder: - output_ids = output_ids[:, input_length:] + # SteeringPipeline already truncates output tokens to generated tokens only, don't truncate again output_text = self._tokenizer.batch_decode( output_ids,