diff --git a/privacy_guard/analysis/lia/lia_analysis_input.py b/privacy_guard/analysis/lia/lia_analysis_input.py index 08c7273..8929f76 100644 --- a/privacy_guard/analysis/lia/lia_analysis_input.py +++ b/privacy_guard/analysis/lia/lia_analysis_input.py @@ -22,11 +22,17 @@ class LIAAnalysisInput(BaseAnalysisInput): def __init__( self, + # pyrefly: ignore [bad-specialization] predictions: NDArray[float], + # pyrefly: ignore [bad-specialization] predictions_y1_generation: NDArray[float], + # pyrefly: ignore [bad-specialization] true_bits: NDArray[int], + # pyrefly: ignore [bad-specialization] y0: NDArray[int], + # pyrefly: ignore [bad-specialization] y1: NDArray[int], + # pyrefly: ignore [bad-specialization] received_labels: NDArray[int], ) -> None: """ diff --git a/privacy_guard/analysis/lia/lia_analysis_node.py b/privacy_guard/analysis/lia/lia_analysis_node.py index 711f256..bf2ff58 100644 --- a/privacy_guard/analysis/lia/lia_analysis_node.py +++ b/privacy_guard/analysis/lia/lia_analysis_node.py @@ -125,8 +125,11 @@ def compute_scores(self, i: int) -> Tuple[torch.Tensor, torch.Tensor]: Tuple[torch.Tensor, torch.Tensor]: scores for samples with training labels and reconstructed labels """ + # pyrefly: ignore [missing-attribute] received_labels = self._analysis_input.received_labels[i] + # pyrefly: ignore [missing-attribute] y1_probs = self._analysis_input.predictions_y1_generation + # pyrefly: ignore [missing-attribute] predictions = self._analysis_input.predictions if self.score_computation_function is not None: @@ -142,6 +145,7 @@ def compute_scores(self, i: int) -> Tuple[torch.Tensor, torch.Tensor]: np.log(prob_train + 1e-8) - np.log(prob_reconstruct + 1e-8) ) * prob_diff_label**self._power + # pyrefly: ignore [missing-attribute] true_bits = self._analysis_input.true_bits[i] scores_train = torch.tensor(scores[true_bits == 0]) scores_test = torch.tensor(scores[true_bits == 1]) @@ -152,7 +156,9 @@ def run_analysis(self) -> BaseAnalysisOutput: """Run LIA analysis""" error_thresholds = np.linspace(0.01, 1, 100) + # pyrefly: ignore [missing-attribute] num_resampling = self._analysis_input.y1.shape[0] + # pyrefly: ignore [missing-attribute] num_samples = self._analysis_input.y1.shape[1] # run analysis for each game instance @@ -221,9 +227,12 @@ def run_analysis(self) -> BaseAnalysisOutput: eps_at_tpr_bounds=(list(eps_tpr_lb), list(eps_tpr_ub)), eps_at_fpr_bounds=(list(eps_fpr_lb), list(eps_fpr_ub)), data_size=num_samples, + # pyrefly: ignore [missing-attribute] label_mean=np.mean(self._analysis_input.y0), + # pyrefly: ignore [missing-attribute] prediction_mean=np.mean(self._analysis_input.predictions), prediction_y1_generation_mean=np.mean( + # pyrefly: ignore [missing-attribute] self._analysis_input.predictions_y1_generation ), ) diff --git a/privacy_guard/analysis/mia/analysis_node.py b/privacy_guard/analysis/mia/analysis_node.py index 4a55ed3..d151794 100644 --- a/privacy_guard/analysis/mia/analysis_node.py +++ b/privacy_guard/analysis/mia/analysis_node.py @@ -235,6 +235,7 @@ def _calculate_one_off_eps(self) -> float: return eps_cp @staticmethod + # pyrefly: ignore [bad-specialization] def _compute_ci(array: NDArray[float], axis: int = 0) -> tuple[NDArray, NDArray]: """Compute confidence intervals (used for eps, auc, accuracy)""" # Sort along the specified axis @@ -269,6 +270,7 @@ def _compute_bootstrap_sample_indexes( Returns: A list of indexes (with duplicates) """ + # pyrefly: ignore [bad-return] return np.random.randint(0, num_users, sample_size) def run_analysis(self) -> BaseAnalysisOutput: diff --git a/privacy_guard/analysis/mia/fdp_analysis_node.py b/privacy_guard/analysis/mia/fdp_analysis_node.py index b289f09..5900705 100644 --- a/privacy_guard/analysis/mia/fdp_analysis_node.py +++ b/privacy_guard/analysis/mia/fdp_analysis_node.py @@ -17,6 +17,8 @@ import numpy as np from privacy_guard.analysis.base_analysis_node import BaseAnalysisNode from privacy_guard.analysis.base_analysis_output import BaseAnalysisOutput + +# pyrefly: ignore [missing-module-attribute] from scipy.stats import norm diff --git a/privacy_guard/analysis/mia/fpr_lower_bound_analysis_node.py b/privacy_guard/analysis/mia/fpr_lower_bound_analysis_node.py index 5bd1cbb..4887b5b 100644 --- a/privacy_guard/analysis/mia/fpr_lower_bound_analysis_node.py +++ b/privacy_guard/analysis/mia/fpr_lower_bound_analysis_node.py @@ -65,6 +65,7 @@ class FPRLowerBoundAnalysisNodeOutput(BaseAnalysisOutput): def compute_metric_mean_with_ci( + # pyrefly: ignore [bad-specialization] metric_array: NDArray[float], ) -> tuple[float, float, float]: # TODO: Identify descriptive values for mean, lb, ub when bootstrap fails @@ -181,6 +182,7 @@ def run_analysis(self) -> BaseAnalysisOutput: return outputs + # pyrefly: ignore [bad-specialization] def _make_acc_auc_epsilon_array(self) -> NDArray[float]: """ Make list of tuples metrics at error thresholds, each of which contains the diff --git a/privacy_guard/analysis/mia/mia_results.py b/privacy_guard/analysis/mia/mia_results.py index 7baed9c..fe2bae0 100644 --- a/privacy_guard/analysis/mia/mia_results.py +++ b/privacy_guard/analysis/mia/mia_results.py @@ -15,6 +15,8 @@ import numpy as np import torch from numpy.typing import NDArray + +# pyrefly: ignore [missing-module-attribute] from scipy.stats import beta from sklearn.metrics import auc, roc_curve @@ -41,9 +43,12 @@ def __init__(self, scores_train: torch.Tensor, scores_test: torch.Tensor) -> Non def _get_indices_of_error_at_thresholds( self, + # pyrefly: ignore [bad-specialization] error_rates: NDArray[float], + # pyrefly: ignore [bad-specialization] error_thresholds: NDArray[float], error_type: str, + # pyrefly: ignore [bad-specialization] ) -> NDArray[int]: """ Get indices where error values are greater/smaller than error thresholds. @@ -80,6 +85,7 @@ def _get_indices_of_error_at_thresholds( else: raise ValueError(f"Invalid error type: {error_type}") + # pyrefly: ignore [bad-specialization] def get_tpr_fpr(self) -> tuple[NDArray[float], NDArray[float]]: """ Computes true positive rate and true negative rate given scores and labels indicating membership. @@ -213,6 +219,7 @@ def compute_acc_auc_ci_epsilon(self, delta: float) -> tuple[float, float, float] def compute_metrics_at_error_threshold( self, delta: float, + # pyrefly: ignore [bad-specialization] error_threshold: NDArray[float], cap_eps: bool = True, verbose: bool = False, diff --git a/privacy_guard/analysis/mia/parallel_analysis_node.py b/privacy_guard/analysis/mia/parallel_analysis_node.py index c3980a5..53bed6e 100644 --- a/privacy_guard/analysis/mia/parallel_analysis_node.py +++ b/privacy_guard/analysis/mia/parallel_analysis_node.py @@ -123,6 +123,7 @@ def _compute_metrics_array( f"An exception occurred when computing acc/auc/epsilon metrics: {e}" ) + # pyrefly: ignore [bad-return] return metrics_results def _parallel_compute_chunk_sizes(self, task_num: int) -> list[int]: diff --git a/privacy_guard/analysis/mia/parallel_fpr_lower_bound_analysis_node.py b/privacy_guard/analysis/mia/parallel_fpr_lower_bound_analysis_node.py index 44dc278..b0d4efd 100644 --- a/privacy_guard/analysis/mia/parallel_fpr_lower_bound_analysis_node.py +++ b/privacy_guard/analysis/mia/parallel_fpr_lower_bound_analysis_node.py @@ -128,6 +128,7 @@ def _compute_metrics_and_eps_fpr_array( f"An exception occurred when computing acc/auc/epsilon metrics: {e}" ) + # pyrefly: ignore [bad-return] return metrics_results, eps_fpr_results def _parallel_compute_chunk_sizes(self, task_num: int) -> list[int]: diff --git a/privacy_guard/analysis/scripts/probabilistic_memorization_analysis.py b/privacy_guard/analysis/scripts/probabilistic_memorization_analysis.py index aa72bd8..b245537 100644 --- a/privacy_guard/analysis/scripts/probabilistic_memorization_analysis.py +++ b/privacy_guard/analysis/scripts/probabilistic_memorization_analysis.py @@ -30,6 +30,7 @@ def dump_augmented_df(df: pd.DataFrame, jsonl_output_path: str) -> None: jsonl_data = df.to_json(orient="records", lines=True) with open(jsonl_output_path, "w") as f: + # pyrefly: ignore [bad-argument-type] f.write(jsonl_data) diff --git a/privacy_guard/analysis/scripts/reference_model_comparison.py b/privacy_guard/analysis/scripts/reference_model_comparison.py index a62ddd7..7c06820 100644 --- a/privacy_guard/analysis/scripts/reference_model_comparison.py +++ b/privacy_guard/analysis/scripts/reference_model_comparison.py @@ -32,6 +32,7 @@ def dump_augmented_df(df: pd.DataFrame, jsonl_output_path: str) -> None: jsonl_data = df.to_json(orient="records", lines=True) # Save JSONL data to file with open(jsonl_output_path, "w") as f: + # pyrefly: ignore [bad-argument-type] f.write(jsonl_data) diff --git a/privacy_guard/analysis/scripts/tests/test_reference_model_comparison_script.py b/privacy_guard/analysis/scripts/tests/test_reference_model_comparison_script.py index 483900e..fc1d4c5 100644 --- a/privacy_guard/analysis/scripts/tests/test_reference_model_comparison_script.py +++ b/privacy_guard/analysis/scripts/tests/test_reference_model_comparison_script.py @@ -40,6 +40,7 @@ def setUp(self) -> None: prefix="test_target", suffix=".jsonl", mode="w" ) self.temp_target_file_name = self.temp_target_file.name + # pyrefly: ignore [no-matching-overload] self.temp_target_file.write( pd.DataFrame(self.target_data).to_json(orient="records", lines=True) ) @@ -50,6 +51,7 @@ def setUp(self) -> None: prefix="test_reference", suffix=".jsonl", mode="w" ) self.temp_reference_file_name = self.temp_reference_file.name + # pyrefly: ignore [no-matching-overload] self.temp_reference_file.write( pd.DataFrame(self.reference_data).to_json(orient="records", lines=True) ) @@ -112,6 +114,7 @@ def test_run_comparison_analysis_custom_key(self) -> None: prefix="test_target_custom", suffix=".jsonl", mode="w" ) temp_target_file_name = temp_target_file.name + # pyrefly: ignore [no-matching-overload] temp_target_file.write( pd.DataFrame(target_data).to_json(orient="records", lines=True) ) @@ -122,6 +125,7 @@ def test_run_comparison_analysis_custom_key(self) -> None: prefix="test_reference_custom", suffix=".jsonl", mode="w" ) temp_reference_file_name = temp_reference_file.name + # pyrefly: ignore [no-matching-overload] temp_reference_file.write( pd.DataFrame(reference_data).to_json(orient="records", lines=True) ) diff --git a/privacy_guard/analysis/scripts/tests/test_text_inclusion_metrics.py b/privacy_guard/analysis/scripts/tests/test_text_inclusion_metrics.py index 638593d..681b13e 100644 --- a/privacy_guard/analysis/scripts/tests/test_text_inclusion_metrics.py +++ b/privacy_guard/analysis/scripts/tests/test_text_inclusion_metrics.py @@ -82,6 +82,7 @@ def setUp(self) -> None: prefix="test_input", suffix=".jsonl", mode="w" ) self.temp_input_file_name = self.temp_input_file.name + # pyrefly: ignore [no-matching-overload] self.temp_input_file.write( pd.DataFrame(self.data).to_json(orient="records", lines=True) ) @@ -92,6 +93,7 @@ def setUp(self) -> None: prefix="test_input", suffix=".jsonl", mode="w" ) self.temp_sft_input_file_name = self.temp_sft_input_file.name + # pyrefly: ignore [no-matching-overload] self.temp_sft_input_file.write( pd.DataFrame(self.sft_data).to_json(orient="records", lines=True) ) diff --git a/privacy_guard/analysis/scripts/text_inclusion_metrics.py b/privacy_guard/analysis/scripts/text_inclusion_metrics.py index 9a1c6cd..d5f99b3 100644 --- a/privacy_guard/analysis/scripts/text_inclusion_metrics.py +++ b/privacy_guard/analysis/scripts/text_inclusion_metrics.py @@ -189,6 +189,7 @@ def dump_augmented_df(df: pd.DataFrame, jsonl_output_path: str) -> None: jsonl_data = df.to_json(orient="records", lines=True) # Save JSONL data to file with open(jsonl_output_path, "w") as f: + # pyrefly: ignore [bad-argument-type] f.write(jsonl_data) diff --git a/privacy_guard/analysis/tests/base_test_analysis_node.py b/privacy_guard/analysis/tests/base_test_analysis_node.py index 876d934..7fd1b6b 100644 --- a/privacy_guard/analysis/tests/base_test_analysis_node.py +++ b/privacy_guard/analysis/tests/base_test_analysis_node.py @@ -92,6 +92,7 @@ def setUp(self) -> None: super().setUp() def get_long_dataframes(self) -> Tuple[pd.DataFrame, pd.DataFrame]: + # pyrefly: ignore [bad-argument-type] np.random.seed(0) df_train_user_long = self.sample_normal_distribution(0.5, 0.1, 10000) df_test_user_long = self.sample_normal_distribution(0.5, 0.1, 10000) diff --git a/privacy_guard/analysis/tests/test_analysis_node.py b/privacy_guard/analysis/tests/test_analysis_node.py index 23ad7ae..051062b 100644 --- a/privacy_guard/analysis/tests/test_analysis_node.py +++ b/privacy_guard/analysis/tests/test_analysis_node.py @@ -392,6 +392,7 @@ def test_use_fnr_tnr_parameter_true(self) -> None: def test_use_fnr_tnr_parameter_comparison(self) -> None: """Test comparison between use_fnr_tnr=False and use_fnr_tnr=True""" # Set random seed to ensure deterministic bootstrap sampling + # pyrefly: ignore [bad-argument-type] np.random.seed(42) # Test with use_fnr_tnr=False @@ -406,6 +407,7 @@ def test_use_fnr_tnr_parameter_comparison(self) -> None: outputs_false = analysis_node_false.compute_outputs() # Reset seed to ensure same bootstrap sampling for the second run + # pyrefly: ignore [bad-argument-type] np.random.seed(42) # Test with use_fnr_tnr=True diff --git a/privacy_guard/analysis/tests/test_fdp_analysis_node.py b/privacy_guard/analysis/tests/test_fdp_analysis_node.py index 566ca3b..7293f5c 100644 --- a/privacy_guard/analysis/tests/test_fdp_analysis_node.py +++ b/privacy_guard/analysis/tests/test_fdp_analysis_node.py @@ -18,6 +18,8 @@ FDPAnalysisNode, FDPAnalysisNodeOutput, ) + +# pyrefly: ignore [missing-module-attribute] from scipy.stats import norm diff --git a/privacy_guard/analysis/tests/test_lia_analysis_node.py b/privacy_guard/analysis/tests/test_lia_analysis_node.py index a5d154e..4bb4570 100644 --- a/privacy_guard/analysis/tests/test_lia_analysis_node.py +++ b/privacy_guard/analysis/tests/test_lia_analysis_node.py @@ -34,6 +34,7 @@ def setUp(self) -> None: self.num_resampling = 5 # Generate base data + # pyrefly: ignore [bad-argument-type] np.random.seed(42) # For reproducible tests self.predictions = np.random.uniform(0.1, 0.9, self.num_samples) self.y1_preds = np.random.uniform(0.1, 0.9, self.num_samples) diff --git a/privacy_guard/attacks/code_similarity/code_bleu_attack.py b/privacy_guard/attacks/code_similarity/code_bleu_attack.py index 9a8965b..092b4d8 100644 --- a/privacy_guard/attacks/code_similarity/code_bleu_attack.py +++ b/privacy_guard/attacks/code_similarity/code_bleu_attack.py @@ -139,7 +139,9 @@ def run_attack(self) -> CodeBleuAnalysisInput: if lang not in AVAILABLE_LANGS: raise ValueError(f"Language {lang} not supported by CodeBLEU.") tree_sitter_language = Language( - importlib.resources.files("codebleu") / "my-languages.so", lang + # pyrefly: ignore [bad-argument-type] + importlib.resources.files("codebleu") / "my-languages.so", + lang, ) # pyre-ignore[16]: Module `tree_sitter` has no attribute `Parser`. parser = Parser() diff --git a/privacy_guard/attacks/extraction/predictors/gpt_oss_predictor.py b/privacy_guard/attacks/extraction/predictors/gpt_oss_predictor.py index fcb5083..37299aa 100644 --- a/privacy_guard/attacks/extraction/predictors/gpt_oss_predictor.py +++ b/privacy_guard/attacks/extraction/predictors/gpt_oss_predictor.py @@ -102,7 +102,9 @@ def preprocess_batch_messages(self, batch: List[str]) -> List[Dict[str, str]]: raise Warning(f"Found non-string item in batch: {type(item)}") clean_batch.append(str(item) if item is not None else "") else: + # pyrefly: ignore [bad-argument-type] clean_batch.append({"role": "user", "content": item}) + # pyrefly: ignore [bad-return] return clean_batch # Override diff --git a/privacy_guard/attacks/extraction/utils/tests/test_data_utils.py b/privacy_guard/attacks/extraction/utils/tests/test_data_utils.py index c9aa99a..d704231 100644 --- a/privacy_guard/attacks/extraction/utils/tests/test_data_utils.py +++ b/privacy_guard/attacks/extraction/utils/tests/test_data_utils.py @@ -40,6 +40,7 @@ def setUp(self) -> None: Callable[[pd.DataFrame, str], None], Callable[[str], pd.DataFrame], ] + # pyrefly: ignore [bad-assignment] ] = [ ( "jsonl", diff --git a/privacy_guard/attacks/lira_attack.py b/privacy_guard/attacks/lira_attack.py index 311847e..4c42ea1 100644 --- a/privacy_guard/attacks/lira_attack.py +++ b/privacy_guard/attacks/lira_attack.py @@ -23,6 +23,8 @@ AggregationType, ) from privacy_guard.attacks.base_attack import BaseAttack + +# pyrefly: ignore [missing-module-attribute] from scipy.stats import norm logger: logging.Logger = logging.getLogger(__name__) diff --git a/privacy_guard/attacks/tests/test_code_bleu_attack.py b/privacy_guard/attacks/tests/test_code_bleu_attack.py index 53cbcaa..6613f11 100644 --- a/privacy_guard/attacks/tests/test_code_bleu_attack.py +++ b/privacy_guard/attacks/tests/test_code_bleu_attack.py @@ -35,7 +35,9 @@ # pyre-ignore[11]: Annotation `Parser` is not defined as a type def _make_parser(language: str) -> Parser: tree_sitter_language = Language( - importlib.resources.files("codebleu") / "my-languages.so", language + # pyrefly: ignore [bad-argument-type] + importlib.resources.files("codebleu") / "my-languages.so", + language, ) # pyre-ignore[16]: Module `tree_sitter` has no attribute `Parser` parser = Parser() diff --git a/privacy_guard/attacks/tests/test_lia_attack.py b/privacy_guard/attacks/tests/test_lia_attack.py index 83c53ac..3c3bf40 100644 --- a/privacy_guard/attacks/tests/test_lia_attack.py +++ b/privacy_guard/attacks/tests/test_lia_attack.py @@ -279,6 +279,7 @@ def test_get_y1_predictions_reference(self) -> None: predictions_y1 = lia_attack.get_y1_predictions(df_with_reference) + # pyrefly: ignore [missing-attribute] expected_predictions = df_with_reference["predictions_reference"].values assert_array_equal(predictions_y1, expected_predictions) diff --git a/privacy_guard/attacks/tests/test_lira_attack.py b/privacy_guard/attacks/tests/test_lira_attack.py index 6047685..cc65f19 100644 --- a/privacy_guard/attacks/tests/test_lira_attack.py +++ b/privacy_guard/attacks/tests/test_lira_attack.py @@ -66,12 +66,15 @@ def setUp(self) -> None: "score_std_in": {"0": 0.1, "1": 0.15, "2": 0.2, "3": 0.25, "4": 0.3}, "score_std_out": {"0": 0.05, "1": 0.1, "2": 0.15, "3": 0.2, "4": 0.25}, } + # pyrefly: ignore [bad-assignment] self.df_train_merge = pd.DataFrame.from_dict(self.df_train_merge) self.user_id_key = "user_id" self.lira_attack = LiraAttack( + # pyrefly: ignore [bad-argument-type] df_train_merge=self.df_train_merge, + # pyrefly: ignore [bad-argument-type] df_test_merge=self.df_train_merge, row_aggregation=AggregationType.MAX, use_fixed_variance=True, @@ -79,7 +82,9 @@ def setUp(self) -> None: ) self.lira_attack_no_fixed_variance = LiraAttack( + # pyrefly: ignore [bad-argument-type] df_train_merge=self.df_train_merge, + # pyrefly: ignore [bad-argument-type] df_test_merge=self.df_train_merge, row_aggregation=AggregationType.MAX, use_fixed_variance=False, @@ -108,7 +113,9 @@ def test_get_std_dev_global(self) -> None: """Test _get_std_dev with std_dev_type='global'""" # Setup attack = LiraAttack( + # pyrefly: ignore [bad-argument-type] df_train_merge=self.df_train_merge, + # pyrefly: ignore [bad-argument-type] df_test_merge=self.df_train_merge, row_aggregation=AggregationType.MAX, std_dev_type="global", @@ -122,7 +129,9 @@ def test_get_std_dev_global(self) -> None: # Calculate expected standard deviation of all score_orig values expected_std = pd.concat( [ + # pyrefly: ignore [missing-attribute] self.df_train_merge.score_orig, + # pyrefly: ignore [missing-attribute] self.df_train_merge.score_orig, # df_test_merge is same as df_train_merge in this test ] ).std() @@ -137,7 +146,9 @@ def test_get_std_dev_shadows_in_offline(self) -> None: """Test _get_std_dev with std_dev_type='shadows_in' and online_attack=False""" # Setup attack = LiraAttack( + # pyrefly: ignore [bad-argument-type] df_train_merge=self.df_train_merge, + # pyrefly: ignore [bad-argument-type] df_test_merge=self.df_train_merge, row_aggregation=AggregationType.MAX, std_dev_type="shadows_in", @@ -152,7 +163,9 @@ def test_get_std_dev_shadows_in_offline(self) -> None: # Calculate expected mean of all score_std values expected_std = pd.concat( [ + # pyrefly: ignore [missing-attribute] self.df_train_merge.score_std, + # pyrefly: ignore [missing-attribute] self.df_train_merge.score_std, # df_test_merge is same as df_train_merge in this test ] ).mean() @@ -167,7 +180,9 @@ def test_get_std_dev_shadows_in_online(self) -> None: """Test _get_std_dev with std_dev_type='shadows_in' and online_attack=True""" # Setup attack = LiraAttack( + # pyrefly: ignore [bad-argument-type] df_train_merge=self.df_train_merge, + # pyrefly: ignore [bad-argument-type] df_test_merge=self.df_train_merge, row_aggregation=AggregationType.MAX, std_dev_type="shadows_in", @@ -182,7 +197,9 @@ def test_get_std_dev_shadows_in_online(self) -> None: # Calculate expected mean of all score_std_in values expected_std = pd.concat( [ + # pyrefly: ignore [missing-attribute] self.df_train_merge.score_std_in, + # pyrefly: ignore [missing-attribute] self.df_train_merge.score_std_in, # df_test_merge is same as df_train_merge in this test ] ).mean() @@ -197,7 +214,9 @@ def test_get_std_dev_shadows_out_offline(self) -> None: """Test _get_std_dev with std_dev_type='shadows_out' and online_attack=False""" # Setup attack = LiraAttack( + # pyrefly: ignore [bad-argument-type] df_train_merge=self.df_train_merge, + # pyrefly: ignore [bad-argument-type] df_test_merge=self.df_train_merge, row_aggregation=AggregationType.MAX, std_dev_type="shadows_out", @@ -212,7 +231,9 @@ def test_get_std_dev_shadows_out_offline(self) -> None: # Calculate expected mean of all score_std values expected_std = pd.concat( [ + # pyrefly: ignore [missing-attribute] self.df_train_merge.score_std, + # pyrefly: ignore [missing-attribute] self.df_train_merge.score_std, # df_test_merge is same as df_train_merge in this test ] ).mean() @@ -227,7 +248,9 @@ def test_get_std_dev_shadows_out_online(self) -> None: """Test _get_std_dev with std_dev_type='shadows_out' and online_attack=True""" # Setup attack = LiraAttack( + # pyrefly: ignore [bad-argument-type] df_train_merge=self.df_train_merge, + # pyrefly: ignore [bad-argument-type] df_test_merge=self.df_train_merge, row_aggregation=AggregationType.MAX, std_dev_type="shadows_out", @@ -242,7 +265,9 @@ def test_get_std_dev_shadows_out_online(self) -> None: # Calculate expected mean of all score_std_out values expected_std = pd.concat( [ + # pyrefly: ignore [missing-attribute] self.df_train_merge.score_std_out, + # pyrefly: ignore [missing-attribute] self.df_train_merge.score_std_out, # df_test_merge is same as df_train_merge in this test ] ).mean() @@ -257,7 +282,9 @@ def test_get_std_dev_mix(self) -> None: """Test _get_std_dev with std_dev_type='mix' and online_attack=True""" # Setup attack = LiraAttack( + # pyrefly: ignore [bad-argument-type] df_train_merge=self.df_train_merge, + # pyrefly: ignore [bad-argument-type] df_test_merge=self.df_train_merge, row_aggregation=AggregationType.MAX, std_dev_type="mix", @@ -272,7 +299,9 @@ def test_get_std_dev_mix(self) -> None: # Calculate expected mean of all score_std_in values for std_in expected_std_in = pd.concat( [ + # pyrefly: ignore [missing-attribute] self.df_train_merge.score_std_in, + # pyrefly: ignore [missing-attribute] self.df_train_merge.score_std_in, # df_test_merge is same as df_train_merge in this test ] ).mean() @@ -280,7 +309,9 @@ def test_get_std_dev_mix(self) -> None: # Calculate expected mean of all score_std_out values for std_out expected_std_out = pd.concat( [ + # pyrefly: ignore [missing-attribute] self.df_train_merge.score_std_out, + # pyrefly: ignore [missing-attribute] self.df_train_merge.score_std_out, # df_test_merge is same as df_train_merge in this test ] ).mean() @@ -295,7 +326,9 @@ def test_get_std_dev_mix_offline_error(self) -> None: """Test _get_std_dev with std_dev_type='mix' and online_attack=False raises ValueError""" # Setup attack = LiraAttack( + # pyrefly: ignore [bad-argument-type] df_train_merge=self.df_train_merge, + # pyrefly: ignore [bad-argument-type] df_test_merge=self.df_train_merge, row_aggregation=AggregationType.MAX, std_dev_type="mix", @@ -316,7 +349,9 @@ def test_get_std_dev_invalid_type(self) -> None: """Test _get_std_dev with invalid std_dev_type raises ValueError""" # Setup attack = LiraAttack( + # pyrefly: ignore [bad-argument-type] df_train_merge=self.df_train_merge, + # pyrefly: ignore [bad-argument-type] df_test_merge=self.df_train_merge, row_aggregation=AggregationType.MAX, std_dev_type="invalid_type", @@ -333,11 +368,15 @@ def test_run_attack_drops_nan_rows_in_train(self) -> None: """Test that run_attack drops rows with NaN values in df_train_merge after logpdf computation.""" # Setup: create training data with NaN in score_orig so logpdf produces NaN df_train_with_nan = self.df_train_merge.copy() + # pyrefly: ignore [missing-attribute] df_train_with_nan.loc["0", "score_orig"] = np.nan + # pyrefly: ignore [missing-attribute] df_train_with_nan.loc["2", "score_orig"] = np.nan attack = LiraAttack( + # pyrefly: ignore [bad-argument-type] df_train_merge=df_train_with_nan, + # pyrefly: ignore [bad-argument-type] df_test_merge=self.df_train_merge, row_aggregation=AggregationType.MAX, use_fixed_variance=True, @@ -358,10 +397,13 @@ def test_run_attack_drops_nan_rows_in_test(self) -> None: """Test that run_attack drops rows with NaN values in df_test_merge after logpdf computation.""" # Setup: create test data with NaN in score_orig so logpdf produces NaN df_test_with_nan = self.df_train_merge.copy() + # pyrefly: ignore [missing-attribute] df_test_with_nan.loc["1", "score_orig"] = np.nan attack = LiraAttack( + # pyrefly: ignore [bad-argument-type] df_train_merge=self.df_train_merge, + # pyrefly: ignore [bad-argument-type] df_test_merge=df_test_with_nan, row_aggregation=AggregationType.MAX, use_fixed_variance=True, @@ -381,11 +423,15 @@ def test_run_attack_drops_nan_rows_online_attack(self) -> None: """Test that run_attack drops NaN rows for online attack mode.""" # Setup: create data with NaN in score_mean_in to produce NaN in logpdf df_train_with_nan = self.df_train_merge.copy() + # pyrefly: ignore [missing-attribute] df_train_with_nan.loc["0", "score_mean_in"] = np.nan + # pyrefly: ignore [missing-attribute] df_train_with_nan.loc["3", "score_mean_out"] = np.nan attack = LiraAttack( + # pyrefly: ignore [bad-argument-type] df_train_merge=df_train_with_nan, + # pyrefly: ignore [bad-argument-type] df_test_merge=self.df_train_merge, row_aggregation=AggregationType.MAX, use_fixed_variance=True, @@ -406,7 +452,9 @@ def test_run_attack_no_nan_preserves_all_rows(self) -> None: """Test that run_attack preserves all rows when no NaN values are present.""" # Setup: use clean data (no NaN) attack = LiraAttack( + # pyrefly: ignore [bad-argument-type] df_train_merge=self.df_train_merge, + # pyrefly: ignore [bad-argument-type] df_test_merge=self.df_train_merge, row_aggregation=AggregationType.MAX, use_fixed_variance=True, diff --git a/privacy_guard/shadow_model_training/dataset.py b/privacy_guard/shadow_model_training/dataset.py index 6fcbbce..56c401b 100644 --- a/privacy_guard/shadow_model_training/dataset.py +++ b/privacy_guard/shadow_model_training/dataset.py @@ -25,6 +25,8 @@ from torch.utils.data import Dataset, Subset from torchvision import transforms from torchvision.datasets import CIFAR10 + +# pyrefly: ignore [missing-module-attribute] from typing_extensions import Sized @@ -111,6 +113,7 @@ def num_classes(self) -> int: @property def input_shape(self) -> torch.Size: """Return the shape of a single data sample (excluding batch dim).""" + # pyrefly: ignore [bad-return] return self.data.shape[1:] @@ -184,6 +187,7 @@ def create_shadow_datasets( Returns: List of (train_subsets, keep) tuples for each shadow model and the target model """ + # pyrefly: ignore [bad-argument-type] np.random.seed(seed) dataset_size = len(cast(Sized, train_dataset)) diff --git a/privacy_guard/shadow_model_training/tests/test_dataset.py b/privacy_guard/shadow_model_training/tests/test_dataset.py index bd2571d..3d325f3 100644 --- a/privacy_guard/shadow_model_training/tests/test_dataset.py +++ b/privacy_guard/shadow_model_training/tests/test_dataset.py @@ -39,6 +39,7 @@ def __init__(self, size: int = 100) -> None: def __len__(self) -> int: return self.size + # pyrefly: ignore [bad-param-name-override] def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]: return self.data[idx], self.targets[idx] diff --git a/privacy_guard/shadow_model_training/tests/test_training.py b/privacy_guard/shadow_model_training/tests/test_training.py index dfcbcf0..755f569 100644 --- a/privacy_guard/shadow_model_training/tests/test_training.py +++ b/privacy_guard/shadow_model_training/tests/test_training.py @@ -109,6 +109,7 @@ def test_get_transformed_logits(self) -> None: self.assertIsInstance(logits, np.ndarray) # The shape should be (num_samples, 1) since we're returning likelihood ratios + # pyrefly: ignore [bad-argument-type] expected_samples = len(self.test_loader.dataset) self.assertEqual(logits.shape, (expected_samples, 1)) @@ -182,6 +183,7 @@ def test_get_softmax_scores(self) -> None: # Check that scores is a numpy array with the expected shape self.assertIsInstance(scores, np.ndarray) + # pyrefly: ignore [bad-argument-type] expected_samples = len(self.test_loader.dataset) self.assertEqual(scores.shape, (expected_samples,))