Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7049f93
Update dependency versions in pyproject.toml
KartikP Feb 11, 2026
27f9d2e
correct file changes
KartikP Feb 11, 2026
b913b91
update sklearn to be greater than 1.6 (inconsistent with vision but a…
KartikP Feb 11, 2026
3fa2a36
Merge branch 'main' into epflneuroailab-changes
KartikP Feb 11, 2026
4f6bf08
restore plugin submission orchestrator
KartikP Feb 11, 2026
deb1899
import numpy
KartikP Feb 11, 2026
1a544ae
Add layer to dummy model neuroidassembly
KartikP Feb 12, 2026
6a65f52
return score object with mean of list of score
KartikP Feb 12, 2026
6738606
Update blank2014 test_ceiling expected value for GroupKFold CV
KartikP Feb 12, 2026
8090665
Propagate split_coord to predictions and fix MultiIndex detection
KartikP Feb 12, 2026
9adee94
remove cv_kwargs for linear
KartikP Feb 12, 2026
dbe92a8
account for difference in pearsonr correlation methods
KartikP Feb 12, 2026
16e896a
Propogate raw and ceiling values for database
KartikP Feb 12, 2026
cf37b02
make identifier format consistent across ridge variants
KartikP Feb 12, 2026
54ba7cb
Merge branch 'main' into epflneuroailab-changes
KartikP Feb 17, 2026
a9def6b
Merge branch 'main' into epflneuroailab-changes
KartikP Feb 18, 2026
019823c
update ridge ceilings
KartikP Feb 18, 2026
8b547d5
handle nan and notebook
KartikP Feb 18, 2026
8aca5b6
rename linear benchmarks to -linear-shuffle
KartikP May 15, 2026
87e8cc7
Merge remote-tracking branch 'origin/main' into epflneuroailab-changes
KartikP May 15, 2026
e46791e
Auto-generate: metadata generation: models (random_embedding), benchm…
github-actions[bot] May 15, 2026
7b82251
wire ridge ceilings to s3
KartikP May 15, 2026
7d8326a
drop stale version_ids from linear-shuffle ceiling kwargs
KartikP May 15, 2026
dd5ebbd
guard Blank2014/Fedorenko ceiling fit against curve_fit failures
KartikP May 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions brainscore_language/benchmarks/blank2014/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from brainscore_language import benchmark_registry
from .benchmark import Blank2014Linear
from .benchmark import Blank2014_ridge, Blank2014_linear_shuffle

benchmark_registry['Blank2014-linear'] = Blank2014Linear
benchmark_registry['Blank2014-ridge'] = Blank2014_ridge
benchmark_registry['Blank2014-linear-shuffle'] = Blank2014_linear_shuffle
50 changes: 43 additions & 7 deletions brainscore_language/benchmarks/blank2014/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import xarray as xr

from brainscore_core.benchmarks import BenchmarkBase
Expand All @@ -9,7 +10,28 @@
from brainscore_language.utils.ceiling import ceiling_normalize


class Blank2014Linear(BenchmarkBase):
def Blank2014_ridge():
return Blank2014(metric="ridge_pearsonr",
cross_validation_kwargs=dict(
splits=8,
split_coord="story",
kfold="group",
random_state=1234
)
)

def Blank2014_linear_shuffle():
return Blank2014(metric="linear_pearsonr",
identifier_suffix="-shuffle",
cross_validation_kwargs=dict(
splits=10,
train_size=0.9,
kfold=False,
random_state=1,
)
)

class Blank2014(BenchmarkBase):
"""
Evaluate model ability to predict neural activity in human language system functional regions of interest (fROIs)
in response to natural stories, recorded by Blank et al. 2014.
Expand All @@ -20,13 +42,13 @@ class Blank2014Linear(BenchmarkBase):
(e.g. "layer 41 corresponds to the language system"), rather than testing every layer separately.
"""

def __init__(self):
def __init__(self, metric: str, cross_validation_kwargs=None, identifier_suffix: str = ""):
self.data = load_dataset('Blank2014.fROI')
self.metric = load_metric('linear_pearsonr')
self.metric = load_metric(metric, crossvalidation_kwargs=cross_validation_kwargs)
ceiler = ExtrapolationCeiling()
ceiling = ceiler(assembly=self.data, metric=self.metric)
super(Blank2014Linear, self).__init__(
identifier='Blank2014-linear',
super(Blank2014, self).__init__(
identifier=f"Blank2014-{metric.split('_')[0]}{identifier_suffix}",
version=1,
parent='neural_language',
ceiling=ceiling,
Expand All @@ -43,8 +65,22 @@ def __call__(self, candidate: ArtificialSubject) -> Score:
story_stimuli = stimuli[story_indexer]
story_predictions = candidate.digest_text(story_stimuli.values)['neural']
story_predictions['stimulus_id'] = 'presentation', story_stimuli['stimulus_id'].values
try:
story_predictions['story']
except KeyError:
story_predictions['story'] = 'presentation', story_stimuli['story'].values
predictions.append(story_predictions)

predictions = xr.concat(predictions, dim='presentation')
raw_score = self.metric(predictions, self.data)
score = ceiling_normalize(raw_score, self.ceiling)
layer_names = np.unique(predictions['layer'].data)
layer_names = [layer_names] if isinstance(layer_names, str) else layer_names
layer_scores = {}
for layer_name in layer_names:
raw_score = self.metric(predictions.sel(layer=layer_name), self.data)
layer_scores[layer_name] = ceiling_normalize(raw_score, self.ceiling)

score = Score(np.mean(list(layer_scores.values())))
score.attrs['layer_scores'] = layer_scores
score.attrs['raw'] = Score(np.mean([s.attrs['raw'] for s in layer_scores.values()]))
score.attrs['ceiling'] = self.ceiling
return score
12 changes: 10 additions & 2 deletions brainscore_language/benchmarks/blank2014/ceiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,10 @@ def extrapolate_neuroid(self, ceilings):
bootstrapped_score = rng.choice(choices, size=len(choices), replace=True)
bootstrapped_scores.append(np.mean(bootstrapped_score))

params = self.fit(subject_subsamples, bootstrapped_scores)
try:
params = self.fit(subject_subsamples, bootstrapped_scores)
except (RuntimeError, ValueError): # optimal parameters not found or NaN inputs
params = [np.nan, np.nan]
params = DataAssembly([params], coords={'bootstrap': [bootstrap], 'param': ['v0', 'tau0']},
dims=['bootstrap', 'param'])
bootstrap_params.append(params)
Expand All @@ -148,7 +151,12 @@ def extrapolate_neuroid(self, ceilings):
return score

def fit(self, subject_subsamples, bootstrapped_scores):
params, pcov = curve_fit(v, subject_subsamples, bootstrapped_scores,
subject_subsamples = np.array(subject_subsamples)
bootstrapped_scores = np.array(bootstrapped_scores)
valid = ~np.isnan(bootstrapped_scores) & np.isfinite(bootstrapped_scores)
if sum(valid) < 1:
raise RuntimeError("No valid scores in sample")
params, pcov = curve_fit(v, subject_subsamples[valid], bootstrapped_scores[valid],
# v (i.e. max ceiling) is between 0 and 1, tau0 unconstrained
bounds=([0, -np.inf], [1, np.inf]))
return params
Expand Down
7 changes: 7 additions & 0 deletions brainscore_language/benchmarks/blank2014/metadata.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
benchmarks:
blank2014:
stimulus_set: null
data_metadata: null
metric_information: null
brainscore_link: https://github.com/brain-score/language/tree/master/brainscore_language/benchmarks/blank2014
extra_notes: Temporary hardcoded metadata - will be replaced with actual generation
12 changes: 7 additions & 5 deletions brainscore_language/benchmarks/blank2014/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,19 @@ def activity_for_text(stimuli: Union[str, List[str]]) -> NeuroidAssembly:
coords={'stimulus_seq': ('presentation', np.arange(num_stimuli)),
'stimulus_num': ('presentation', np.arange(num_stimuli)),
'neuroid_id': ('neuroid', np.arange(num_neuroids)),
'region': ('neuroid', ['some_region'] * num_neuroids)},
'region': ('neuroid', ['some_region'] * num_neuroids),
'layer': ('neuroid', ['test_layer'] * num_neuroids)},
dims=['presentation', 'neuroid'])
neural_activity['stimulus'] = 'presentation', stimuli # copy over
return neural_activity

benchmark = load_benchmark('Blank2014-linear')
benchmark = load_benchmark('Blank2014-linear-shuffle')
dummy_model = TestBenchmark.DummyModel(activity_for_text=activity_for_text)
score = benchmark(dummy_model)
assert score == 0

def test_exact(self):
benchmark = load_benchmark('Blank2014-linear')
benchmark = load_benchmark('Blank2014-linear-shuffle')
exact_data = copy.deepcopy(benchmark.data)

def activity_for_text(stimuli: Union[str, List[str]]) -> NeuroidAssembly:
Expand All @@ -53,6 +54,7 @@ def activity_for_text(stimuli: Union[str, List[str]]) -> NeuroidAssembly:
# remove stimulus_id and stimulus coordinates to not trip up benchmark
passage_activity = passage_activity.reset_index('presentation')
del passage_activity['stimulus_id']
passage_activity['layer'] = 'neuroid', ['test_layer'] * passage_activity.sizes['neuroid']
passage_activity = NeuroidAssembly(passage_activity) # index
return passage_activity

Expand All @@ -61,12 +63,12 @@ def activity_for_text(stimuli: Union[str, List[str]]) -> NeuroidAssembly:
assert score == approx(1)

def test_ceiling(self):
benchmark = load_benchmark(f'Blank2014-linear')
benchmark = load_benchmark(f'Blank2014-linear-shuffle')
ceiling = benchmark.ceiling
assert ceiling == approx(.21026591, abs=.0005)

def test_ceiling_raw(self):
benchmark = load_benchmark(f'Blank2014-linear')
benchmark = load_benchmark(f'Blank2014-linear-shuffle')
ceiling = benchmark.ceiling
assert hasattr(ceiling, 'raw')
assert set(ceiling.raw.dims) == {'neuroid'}
Expand Down
6 changes: 3 additions & 3 deletions brainscore_language/benchmarks/fedorenko2016/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from brainscore_language import benchmark_registry
from .benchmark import Fedorenko2016_ridge, Fedorenko2016_linear
from .benchmark import Fedorenko2016_ridge, Fedorenko2016_linear_shuffle

benchmark_registry['Fedorenko2016-linear'] = Fedorenko2016_linear
benchmark_registry['Fedorenko2016-ridge'] = Fedorenko2016_ridge
benchmark_registry['Fedorenko2016-ridge'] = Fedorenko2016_ridge
benchmark_registry['Fedorenko2016-linear-shuffle'] = Fedorenko2016_linear_shuffle
50 changes: 39 additions & 11 deletions brainscore_language/benchmarks/fedorenko2016/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import xarray as xr

from brainscore_core.benchmarks import BenchmarkBase
from brainscore_core.metrics import Score
from brainscore_language import load_dataset, load_metric
from brainscore_language.artificial_subject import ArtificialSubject
from brainscore_language.benchmarks.blank2014.ceiling import ExtrapolationCeiling
Expand All @@ -9,19 +11,34 @@

from tqdm import tqdm

def Fedorenko2016_linear():
return Fedorenko2016(metric="linear_pearsonr")

def Fedorenko2016_ridge():
return Fedorenko2016(metric="ridge_pearsonr")
return Fedorenko2016(metric="ridge_pearsonr",
cross_validation_kwargs=dict(
split_coord="sentence_id",
kfold="group",
random_state=1234
)
)

def Fedorenko2016_linear_shuffle():
return Fedorenko2016(metric="linear_pearsonr",
identifier_suffix="-shuffle",
cross_validation_kwargs=dict(
splits=10,
train_size=0.9,
kfold=False,
random_state=1,
)
)

class Fedorenko2016(BenchmarkBase):

def __init__(self, metric: str):
def __init__(self, metric: str, cross_validation_kwargs=None, identifier_suffix: str = ""):
self.data = load_dataset('Fedorenko2016.language')
identifier = f"Fedorenko2016-{metric}"
self.metric = load_metric(metric)

identifier = f"Fedorenko2016-{metric.split('_')[0]}{identifier_suffix}"
self.metric = load_metric(metric, crossvalidation_kwargs=cross_validation_kwargs)

ceiler = ExtrapolationCeiling(subject_column="subject_UID")
ceiling = ceiler(assembly=self.data, metric=self.metric)
Expand All @@ -46,11 +63,22 @@ def __call__(self, candidate: ArtificialSubject):
stimuli_values = sentence_stimuli.values
sentence_predictions = candidate.digest_text(stimuli_values)["neural"]
sentence_predictions['stimulus_id'] = 'presentation', sentence_stimuli['stimulus_id'].values
try:
sentence_predictions['sentence_id']
except KeyError:
sentence_predictions['sentence_id'] = 'presentation', sentence_stimuli['sentence_id'].values
predictions.append(sentence_predictions)

predictions = xr.concat(predictions, dim='presentation')
layer_names = np.unique(predictions['layer'].data)
layer_names = [layer_names] if isinstance(layer_names, str) else layer_names
layer_scores = {}
for layer_name in layer_names:
raw_score = self.metric(predictions.sel(layer=layer_name), self.data)
layer_scores[layer_name] = ceiling_normalize(raw_score, self.ceiling)

raw_score = self.metric(predictions, self.data)
scores = ceiling_normalize(raw_score, self.ceiling)

return scores
score = Score(np.mean(list(layer_scores.values())))
score.attrs['layer_scores'] = layer_scores
score.attrs['raw'] = Score(np.mean([s.attrs['raw'] for s in layer_scores.values()]))
score.attrs['ceiling'] = self.ceiling
return score
7 changes: 7 additions & 0 deletions brainscore_language/benchmarks/fedorenko2016/metadata.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
benchmarks:
fedorenko2016:
stimulus_set: null
data_metadata: null
metric_information: null
brainscore_link: https://github.com/brain-score/language/tree/master/brainscore_language/benchmarks/fedorenko2016
extra_notes: Temporary hardcoded metadata - will be replaced with actual generation
10 changes: 7 additions & 3 deletions brainscore_language/benchmarks/pereira2018/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from brainscore_language import benchmark_registry
from .benchmark import Pereira2018_243sentences, Pereira2018_384sentences
from .benchmark import Pereira2018_243sentences_ridge, Pereira2018_384sentences_ridge
from .benchmark import Pereira2018_243sentences_linear_shuffle, Pereira2018_384sentences_linear_shuffle

benchmark_registry['Pereira2018.243sentences-linear'] = Pereira2018_243sentences
benchmark_registry['Pereira2018.384sentences-linear'] = Pereira2018_384sentences
benchmark_registry['Pereira2018.243sentences-ridge'] = Pereira2018_243sentences_ridge
benchmark_registry['Pereira2018.384sentences-ridge'] = Pereira2018_384sentences_ridge

benchmark_registry['Pereira2018.243sentences-linear-shuffle'] = Pereira2018_243sentences_linear_shuffle
benchmark_registry['Pereira2018.384sentences-linear-shuffle'] = Pereira2018_384sentences_linear_shuffle
Loading
Loading